Source code for hail.genetics.call

from collections.abc import Sequence

from hail.typecheck import typecheck_method


[docs]class Call(object): """ An object that represents an individual's call at a genomic locus. Parameters ---------- alleles : :obj:`list` of :obj:`int` List of alleles that compose the call. phased : :obj:`bool` If ``True``, the alleles are phased and the order is specified by `alleles`. Note ---- This object refers to the Python value returned by taking or collecting Hail expressions, e.g. ``mt.GT.take(5`)``. This is rare; it is much more common to manipulate the :class:`.CallExpression` object, which is constructed using the following functions: - :func:`.call` - :func:`.unphased_diploid_gt_index_call` - :func:`.parse_call` """ def __init__(self, alleles, phased=False): # Intentionally not using the type check annotations which are too slow. assert isinstance(alleles, Sequence) assert isinstance(phased, bool) if len(alleles) > 2: raise NotImplementedError("Calls with greater than 2 alleles are not supported.") self._phased = phased ploidy = len(alleles) if phased or ploidy < 2: self._alleles = alleles else: assert ploidy == 2 a0 = alleles[0] a1 = alleles[1] if a1 < a0: a0, a1 = a1, a0 self._alleles = [a0, a1] def __str__(self): n = self.ploidy if n == 0: if self._phased: return '|-' return '-' if n == 1: if self._phased: return f'|{self._alleles[0]}' return str(self._alleles[0]) assert n == 2 a0 = self._alleles[0] a1 = self._alleles[1] if self._phased: return f'{a0}|{a1}' return f'{a0}/{a1}' def __repr__(self): return 'Call(alleles=%s, phased=%s)' % (self._alleles, self._phased) def __eq__(self, other): return ( (self._phased == other._phased and self._alleles == other._alleles) if isinstance(other, Call) else NotImplemented ) def __hash__(self): return hash(self._phased) ^ hash(tuple(self._alleles)) def __getitem__(self, item): """Get the i*th* allele. Returns ------- :obj:`int` """ return self._alleles[item] @property def alleles(self) -> Sequence[int]: """Get the alleles of this call. Returns ------- :obj:`list` of :obj:`int` """ return self._alleles @property def ploidy(self): """The number of alleles for this call. Returns ------- :obj:`int` """ return len(self._alleles) @property def phased(self): """True if the call is phased. Returns ------- :obj:`bool` """ return self._phased
[docs] def is_haploid(self): """True if the ploidy == 1. :rtype: bool """ return self.ploidy == 1
[docs] def is_diploid(self): """True if the ploidy == 2. :rtype: bool """ return self.ploidy == 2
[docs] def is_hom_ref(self): """True if the call has no alternate alleles. :rtype: bool """ if self.ploidy == 0: return False return all(a == 0 for a in self._alleles)
[docs] def is_het(self): """True if the call contains two different alleles. :rtype: bool """ if self.ploidy < 2: return False return self._alleles[0] != self._alleles[1]
[docs] def is_hom_var(self): """True if the call contains identical alternate alleles. :rtype: bool """ n = self.ploidy if n == 0: return False a0 = self._alleles[0] if a0 == 0: return False if n == 1: return True assert n == 2 return self._alleles[1] == a0
[docs] def is_non_ref(self): """True if the call contains any non-reference alleles. :rtype: bool """ return any(a > 0 for a in self._alleles)
[docs] def is_het_non_ref(self): """True if the call contains two different alternate alleles. :rtype: bool """ n = self.ploidy if n < 2: return False assert n == 2 a0 = self._alleles[0] a1 = self._alleles[1] return a0 > 0 and a1 > 0 and a0 != a1
[docs] def is_het_ref(self): """True if the call contains one reference and one alternate allele. :rtype: bool """ n = self.ploidy if n < 2: return False assert n == 2 a0 = self._alleles[0] a1 = self._alleles[1] return (a0 == 0 and a1 > 0) or (a0 > 0 and a1 == 0)
[docs] def n_alt_alleles(self): """Returns the count of non-reference alleles. :rtype: int """ n = 0 for a in self._alleles: if a > 0: n += 1 return n
[docs] @typecheck_method(n_alleles=int) def one_hot_alleles(self, n_alleles): """Returns a list containing the one-hot encoded representation of the called alleles. Examples -------- >>> n_alleles = 2 >>> hom_ref = hl.Call([0, 0]) >>> het = hl.Call([0, 1]) >>> hom_var = hl.Call([1, 1]) >>> het.one_hot_alleles(n_alleles) [1, 1] >>> hom_var.one_hot_alleles(n_alleles) [0, 2] Notes ----- This one-hot representation is the positional sum of the one-hot encoding for each called allele. For a biallelic variant, the one-hot encoding for a reference allele is [1, 0] and the one-hot encoding for an alternate allele is [0, 1]. Parameters ---------- n_alleles : :obj:`int` Number of total alleles, including the reference. Returns ------- :obj:`list` of :obj:`int` """ r = [0] * n_alleles for a in self._alleles: r[a] += 1 return r
[docs] def unphased_diploid_gt_index(self): """Return the genotype index for unphased, diploid calls. Returns ------- :obj:`int` """ from hail.utils import FatalError if self.ploidy != 2 or self.phased: raise FatalError( "'unphased_diploid_gt_index' is only valid for unphased, diploid calls. Found {}.".format(repr(self)) ) a0 = self._alleles[0] a1 = self._alleles[1] assert a0 <= a1 return a1 * (a1 + 1) / 2 + a0