Source code for hail.vds.functions

import hail as hl
from hail.expr.expressions import expr_any, expr_array, expr_call, expr_int32
from hail.expr.functions import _func
from hail.typecheck import enumeration, typecheck


[docs]@typecheck(lgt=expr_call, la=expr_array(expr_int32)) def lgt_to_gt(lgt, la): """Transform LGT into GT using local alleles array. Parameters ---------- lgt : :class:`.CallExpression` LGT value. la : :class:`.ArrayExpression` Local alleles array. Returns ------- :class:`.CallExpression` """ return hl.rbind(lgt, lambda lgt: hl.if_else(lgt.is_non_ref(), _func("lgt_to_gt", hl.tcall, lgt, la), lgt))
[docs]@typecheck( array=expr_array(), local_alleles=expr_array(expr_int32), n_alleles=expr_int32, fill_value=expr_any, number=enumeration('A', 'R', 'G'), ) def local_to_global(array, local_alleles, n_alleles, fill_value, number): """Reindex a locally-indexed array to globally-indexed. Examples -------- >>> local_alleles = hl.array([0, 2]) >>> local_ad = hl.array([9, 10]) >>> local_pl = hl.array([94, 0, 123]) >>> hl.eval(local_to_global(local_ad, local_alleles, n_alleles=3, fill_value=0, number='R')) [9, 0, 10] >>> hl.eval(local_to_global(local_pl, local_alleles, n_alleles=3, fill_value=999, number='G')) [94, 999, 999, 0, 999, 123] Notes ----- The `number` parameter matches the `VCF specification <https://samtools.github.io/hts-specs/VCFv4.3.pdf>`__ number definitions: - ``A`` indicates one value per allele, excluding the reference. - ``R`` indicates one value per allele, including the reference. - ``G`` indicates one value per unique diploid genotype. Warning ------- Using this function can lead to an enormous explosion in data size, without increasing information capacity. Its appropriate use is to conform to antiquated and badly-scaling representations (e.g. pVCF), but even so, caution should be exercised. Reindexing local PLs (or any G-numbered field) at a site with 1000 alleles will produce an array with more than 5,000 values per sample -- with 100,000 samples, nearly 50GB per variant! See Also -------- :func:`.lgt_to_gt` Parameters ---------- array : :class:`.ArrayExpression` Array to reindex. local_alleles : :class:`.ArrayExpression` Local alleles array. n_alleles : :class:`.Int32Expression` Total number of alleles to reindex to. fill_value Value to fill in at global indices with no data. number : :class:`str` One of 'A', 'R', 'G'. Returns ------- :class:`.ArrayExpression` """ try: fill_value = hl.coercer_from_dtype(array.dtype.element_type).coerce(fill_value) except Exception as e: raise ValueError(f'fill_value type {fill_value.dtype} is incompatible with array type {array.dtype}') from e if number == 'G': return _func("local_to_global_g", array.dtype, array, local_alleles, n_alleles, fill_value) elif number == 'R': omit_first = False elif number == 'A': omit_first = True else: raise ValueError(f'unrecognized number {number}') return _func("local_to_global_a_r", array.dtype, array, local_alleles, n_alleles, fill_value, hl.bool(omit_first))