from ..biotools import windows_overlap
import itertools
import numpy as np
[docs]class MutationChoice:
"""Represent a segment of a sequence with several possible variants.
Parameters
----------
segment
A pair (start, end) indicating the range of nucleotides concerned. We
are applying Python range, so
variants
A set of sequence variants, at the given position
Examples
--------
>>> choice = MutationChoice((70, 73), {})
"""
__slots__ = ["segment", "start", "end", "variants", "is_any_nucleotide"]
def __init__(self, segment, variants, is_any_nucleotide=False):
if isinstance(segment, int):
segment = (segment, segment + 1)
self.segment = segment
self.start, self.end = segment
self.variants = variants
self.is_any_nucleotide = is_any_nucleotide
# self.possible_subsequences = set(m.subsequence for m in mutations)
[docs] def random_variant(self, sequence):
"""Return one of the variants, randomly."""
subsequence = sequence[self.start : self.end]
variants = [v for v in self.variants if v != subsequence]
# the sorting of variants seems essential to ensure reproducibility
# between sessions.
# it does not slow down the global algorithm (or less than 3%)
variants = sorted(variants)
return variants[np.random.randint(len(variants))]
[docs] def merge_with(self, others):
"""Merge this mutation choice with others to form a single choice
Examples:
---------
>>> ((2, 5), {'ATT', 'ATA'})
merged with:
>>> [
>>> ((0, 3), {'GTA', 'GCT', 'GTT'}),
>>> ((3, 4), {'A', 'T', 'G', 'C'}),
>>> ((4, 7), {'ATG', 'ACC', 'CTG'})
>>> ]
returns the only choices on the full interval which are compatible with
at least one choice in each of the MutationChoices
>>> (0, 7), {'GTATACC', 'GTATATG'}
"""
others = sorted(others, key=lambda o: o.start)
others_start = others[0].start
final_segment = others_start, others[-1].end
final_variants = set()
for candidate in self.variants:
slots = []
for other in others:
istart, iend = windows_overlap(other.segment, self.segment)
slot = []
for variant in other.variants:
subseq = variant[istart - other.start : iend - other.start]
subcandidate = candidate[
istart - self.start : iend - self.start
]
if subseq == subcandidate:
slot.append(variant)
slots.append(slot)
for subseqs in itertools.product(*slots):
seq = "".join(subseqs)
matching_seq = seq[
self.start - others_start : self.end - others_start
]
if matching_seq == candidate:
final_variants.add(seq)
return MutationChoice(segment=final_segment, variants=final_variants)
def __repr__(self):
"""Represent."""
subsequences = "-".join(self.variants)
return "MutChoice(%d-%d %s)" % (self.start, self.end, subsequences)
def __str__(self):
"""Represent."""
subsequences = "-".join(self.variants)
return "MutChoice(%d-%d %s)" % (self.start, self.end, subsequences)