Source code for hail.representation.pedigree

from hail.java import *
from hail.typecheck import *

[docs]class Trio(object): """Class containing information about nuclear family relatedness and sex. :param str proband: Sample ID of proband. :param fam: Family ID. :type fam: str or None :param father: Sample ID of father. :type father: str or None :param mother: Sample ID of mother. :type mother: str or None :param is_female: Sex of proband. :type is_female: bool or None """ @handle_py4j @typecheck_method(proband=strlike, fam=nullable(strlike), father=nullable(strlike), mother=nullable(strlike), is_female=nullable(bool)) def __init__(self, proband, fam=None, father=None, mother=None, is_female=None): jobject = Env.hail().variant.Sex if is_female is not None: jsex = jsome(jobject.Female()) if is_female else jsome(jobject.Male()) else: jsex = jnone() self._jrep = Env.hail().methods.BaseTrio(proband, joption(fam), joption(father), joption(mother), jsex) self._fam = fam self._proband = proband self._father = father self._mother = mother self._is_female = is_female @classmethod def _from_java(cls, jrep): trio = Trio.__new__(cls) trio._jrep = jrep return trio def __repr__(self): return 'Trio(proband=%s, fam=%s, father=%s, mother=%s, is_female=%s)' % ( repr(self.proband), repr(self.fam), repr(self.father), repr(self.mother), repr(self.is_female)) def __str__(self): return 'Trio(proband=%s, fam=%s, father=%s, mother=%s, is_female=%s)' % ( str(self.proband), str(self.fam), str(self.father), str(self.mother), str(self.is_female)) def __eq__(self, other): if not isinstance(other, Trio): return False else: return self._jrep == other._jrep @handle_py4j def __hash__(self): return self._jrep.hashCode() @property @handle_py4j def proband(self): """ID of proband in trio, never missing. :rtype: str """ if not hasattr(self, '_proband'): self._proband = self._jrep.kid() return self._proband @property @handle_py4j def father(self): """ID of father in trio, may be missing. :rtype: str or None """ if not hasattr(self, '_father'): self._father = from_option(self._jrep.dad()) return self._father @property @handle_py4j def mother(self): """ID of mother in trio, may be missing. :rtype: str or None """ if not hasattr(self, '_mother'): self._mother = from_option(self._jrep.mom()) return self._mother @property @handle_py4j def fam(self): """Family ID. :rtype: str or None """ if not hasattr(self, '_fam'): self._fam = from_option(self._jrep.fam()) return self._fam @property @handle_py4j def is_male(self): """Returns True if the proband is a reported male, False if reported female, and None if no sex is defined. :rtype: bool or None """ if not hasattr(self, '_is_female'): j_female = self._jrep.isFemale() j_male = self._jrep.isFemale() if not j_female and not j_male: self._is_female = None else: self._is_female = j_female return self._is_female is False @property @handle_py4j def is_female(self): """Returns True if the proband is a reported female, False if reported male, and None if no sex is defined. :rtype: bool or None """ if not hasattr(self, '_is_female'): j_female = self._jrep.isFemale() j_male = self._jrep.isFemale() if not j_female and not j_male: self._is_female = None else: self._is_female = j_female return self._is_female is True
[docs] @handle_py4j def is_complete(self): """Returns True if the trio has a defined mother, father, and sex. The considered fields are ``mother``, ``father``, and ``sex``. Recall that ``proband`` may never be missing. The ``fam`` field may be missing in a complete trio. :rtype: bool """ if not hasattr(self, '_complete'): self._complete = self._jrep.isComplete() return self._complete
[docs]class Pedigree(object): """Class containing a list of trios, with extra functionality. :param trios: list of trio objects to include in pedigree :type trios: list of :class:`.Trio` """ @handle_py4j def __init__(self, trios): self._jrep = Env.hail().methods.Pedigree(jindexed_seq([t._jrep for t in trios])) self._trios = trios @classmethod def _from_java(cls, jrep): ped = Pedigree.__new__(cls) ped._jrep = jrep ped._trios = None return ped def __eq__(self, other): if not isinstance(other, Pedigree): return False else: return self._jrep == other._jrep @handle_py4j def __hash__(self): return self._jrep.hashCode()
[docs] @staticmethod @handle_py4j @typecheck(fam_path=strlike, delimiter=strlike) def read(fam_path, delimiter='\\s+'): """Read a .fam file and return a pedigree object. **Examples** >>> ped = Pedigree.read('data/test.fam') **Notes** This method reads a `PLINK .fam file <https://www.cog-genomics.org/plink2/formats#fam>`_. Hail expects a file in the same spec as PLINK outlines. :param str fam_path: path to .fam file. :param str delimiter: Field delimiter. :rtype: :class:`.Pedigree` """ jrep = Env.hail().methods.Pedigree.read(fam_path, Env.hc()._jhc.hadoopConf(), delimiter) return Pedigree._from_java(jrep)
@property @handle_py4j def trios(self): """List of trio objects in this pedigree. :rtype: list of :class:`.Trio` """ if not self._trios: self._trios = [Trio._from_java(t) for t in jiterable_to_list(self._jrep.trios())] return self._trios
[docs] def complete_trios(self): """List of trio objects that have a defined father, mother, and sex. :rtype: list of :class:`.Trio` """ return filter(lambda t: t.is_complete(), self.trios)
[docs] @handle_py4j @typecheck_method(samples=listof(strlike)) def filter_to(self, samples): """Filter the pedigree to a given list of sample IDs. **Notes** For any trio, the following steps will be applied: - If the proband is not in the list of samples provided, the trio is removed. - If the father is not in the list of samples provided, the father is set to ``None``. - If the mother is not in the list of samples provided, the mother is set to ``None``. :param samples: list of sample IDs to keep :type samples: list of str :rtype: :class:`.Pedigree` """ return Pedigree._from_java(self._jrep.filterTo(jset(samples)))
[docs] @handle_py4j @typecheck_method(path=strlike) def write(self, path): """Write a .fam file to the given path. **Examples** >>> ped = Pedigree.read('data/test.fam') >>> ped.write('out.fam') **Notes** This method writes a `PLINK .fam file <https://www.cog-genomics.org/plink2/formats#fam>`_. .. caution:: Phenotype information is not preserved in the Pedigree data structure in Hail. Reading and writing a PLINK .fam file will result in loss of this information. Use the key table method :py:meth:`~hail.KeyTable.import_fam` to manipulate this information. :param path: output path :type path: str """ self._jrep.write(path, Env.hc()._jhc.hadoopConf())