Source code for hail.ldMatrix

from hail.java import *
from hail.representation import Variant
from hail.typecheck import *

[docs]class LDMatrix: """ Represents a symmetric matrix encoding the Pearson correlation between each pair of variants in the accompanying variant list. """ def __init__(self, jldm): self._jldm = jldm
[docs] def variant_list(self): """ Gets the list of variants. The (i, j) entry of the matrix encodes the Pearson correlation between the ith and jth variants. :return: List of variants. :rtype: list of Variant """ jvars = self._jldm.variants() return list(map(lambda jrep: Variant._from_java(jrep), jvars))
[docs] def matrix(self): """ Gets the distributed matrix backing this LD matrix. :return: Matrix of Pearson correlation values. :rtype: `IndexedRowMatrix <https://spark.apache.org/docs/latest/api/python/pyspark.mllib.html#pyspark.mllib.linalg.distributed.IndexedRowMatrix>`__ """ from pyspark.mllib.linalg.distributed import IndexedRowMatrix return IndexedRowMatrix(self._jldm.matrix())
[docs] def to_local_matrix(self): """ Converts the LD matrix to a local Spark matrix. .. caution:: Only call this method when the LD matrix is small enough to fit in local memory on the driver. :return: Matrix of Pearson correlation values. :rtype: `Matrix <https://spark.apache.org/docs/2.1.0/api/python/pyspark.mllib.html#pyspark.mllib.linalg.Matrix>`__ """ from pyspark.mllib.linalg import DenseMatrix j_local_mat = self._jldm.toLocalMatrix() assert j_local_mat.majorStride() == j_local_mat.rows() assert j_local_mat.offset() == 0 assert j_local_mat.isTranspose() == False return DenseMatrix(j_local_mat.rows(), j_local_mat.cols(), list(j_local_mat.data()), False)
[docs] def write(self, path): """ Writes the LD matrix to a file. **Examples** Write an LD matrix to a file. >>> vds.ld_matrix().write('output/ld_matrix') :param path: the path to which to write the LD matrix :type path: str """ self._jldm.write(path)
[docs] @staticmethod def read(path): """ Reads the LD matrix from a file. **Examples** Read an LD matrix from a file. >>> ld_matrix = LDMatrix.read('data/ld_matrix') :param path: the path from which to read the LD matrix :type path: str """ jldm = Env.hail().methods.LDMatrix.read(Env.hc()._jhc, path) return LDMatrix(jldm)
[docs] @typecheck_method(path=strlike, column_delimiter=strlike, header=nullable(strlike), parallel_write=bool, entries=enumeration('full', 'lower', 'strict_lower', 'upper', 'strict_upper')) def export(self, path, column_delimiter, header=None, parallel_write=False, entries='full'): """Exports this matrix as a delimited text file. **Examples** Write a full LD matrix as a tab-separated file: >>> vds.ld_matrix().export('output/ld_matrix.tsv', column_delimiter='\t') Write a full LD matrix as a comma-separated file with the variant list as a header: >>> ldm = vds.ld_matrix() >>> ldm.export('output/ld_matrix.tsv', ... column_delimiter=',', ... header=','.join([str(v) for v in ldm.variant_list()])) Write a full LD matrix as a folder of comma-separated file shards: >>> ldm = vds.ld_matrix() >>> ldm.export('output/ld_matrix.tsv', ... column_delimiter=',', ... header=None, ... parallel_write=True) Write the upper-triangle with the diagonal as a comma-separated file: >>> ldm = vds.ld_matrix() >>> ldm.export('output/ld_matrix.tsv', ... column_delimiter=',', ... entries='upper') **Notes** A matrix cannot be exported if it has more than ``2^31 - 1`` columns. A full, 3x3 LD matrix written as a comma-separated file looks like this: .. code-block:: text 1.0,0.8,0.7 0.8,1.0,0.3 0.7,0.3,1.0 The strict lower triangle: .. code-block:: text 0.8 0.7,0.3 The lower triangle: .. code-block:: text 1.0 0.8,1.0 0.7,0.3,1.0 The strict upper triangle: .. code-block:: text 0.8,0.7 0.3 The upper triangle: .. code-block:: text 1.0,0.8,0.7 1.0,0.3 1.0 :param path: the path at which to write the LD matrix :type path: str :param column_delimiter: the column delimiter :type column_delimiter: str :param header: a string to append before the first row of the matrix :type path: str or None :param parallel_write: if false, a single file is produced, otherwise a folder of file shards is produce; if set to false the export will be slower :type parallel_write: bool :param entries: describes what portion of the entries should be printed, see the notes for a detailed description :type entries: str """ if entries == 'full': self._jldm.export(path, column_delimiter, joption(header), parallel_write) elif entries == 'lower': self._jldm.exportLowerTriangle(path, column_delimiter, joption(header), parallel_write) elif entries == 'strict_lower': self._jldm.exportStrictLowerTriangle(path, column_delimiter, joption(header), parallel_write) elif entries == 'upper': self._jldm.exportUpperTriangle(path, column_delimiter, joption(header), parallel_write) else: self._jldm.exportStrictUpperTriangle(path, column_delimiter, joption(header), parallel_write)