Source code for dnachisel.builtin_specifications.EnforceTranslation

"Implement EnforceTranslation."


from ..Specification import SpecEvaluation
from ..biotools import (
    translate,
    reverse_complement,
    get_backtranslation_table,
)
from ..Location import Location
from .CodonSpecification import CodonSpecification


[docs]class EnforceTranslation(CodonSpecification): """Enforce a specific amino-acid sequence translation. This class enforces the standard translation, but it is also possible to change the class' `codons_sequences` and `codons_translations` dictionaries for more exotic kind of translations Shorthand for annotations: "cds". Parameters ----------- location Either a DnaChisel Location or a tuple of the form (start, end, strand) or just (start, end), with strand defaulting to +1, indicating the position of the gene to codon-optimize. If not provided, the whole sequence is considered as the gene. The location should have a length that is a multiple of 3. The location strand is either 1 if the gene is encoded on the (+) strand, or -1 for antisense. genetic_table Either "Standard", "Bacterial", or any other Biopython genetic table name (see dnachisel.biotools.CODON_TABLE_NAMES for a list of accepted names). start_codon Signals that the first codon is a start codon and provides a policy for changing it. It is very important that this parameter be set when dealing with full coding sequences in organisms with different start codons, e.g. GTG in bacteria. If it is not set, then a GTG (start codon or Valine) could be changed into GTA (also valine but NOT a start codon). Can be False (the first codon is not considered a start codon, but can still naturally code for methionine), ``keep`` (freezes the original sub-sequence at this location, or a codon e.g. ``ATG`` or ``GTG`` or ``["ATG", "GTG"]``. translation String representing the protein sequence that the DNA segment should translate to, eg. "MKY...LL*" ("*" stands for stop codon). Can be omitted if the sequence initially encodes the right sequence. boost Score multiplicator (=weight) for when the specification is used as an optimization objective alongside competing objectives. """ best_possible_score = 0 enforced_by_nucleotide_restrictions = True shorthand_name = "cds" default_genetic_table = "Standard" def __init__( self, genetic_table="default", start_codon=None, translation=None, location=None, boost=1.0, ): """Initialize.""" self.translation = translation location = Location.from_data(location) if (location is not None) and (location.strand not in [-1, 1]): location = Location(location.start, location.end, 1) self.set_location(location) self.start_codon = start_codon self.boost = boost if genetic_table == "default": genetic_table = self.default_genetic_table self.genetic_table = genetic_table self.initialize_translation_from_problem = translation is None self.initialize_location_from_problem = location is None self.backtranslation_table = get_backtranslation_table(genetic_table) def set_location(self, location): """Check that the location length is valid before setting it.""" if location is not None: if len(location) % 3: raise ValueError( "Location length in Codon Specifications should be a 3x. " "Location %s has length %d" % (location, len(location)) ) if (self.translation is not None) and ( len(location) != 3 * len(self.translation) ): raise ValueError( ( "Window size (%d bp) incompatible with translation " "(%d aa)" ) % (len(location), len(self.translation)) ) self.location = location def initialized_on_problem(self, problem, role): """Get translation from the sequence if it is not already set.""" result = self._copy_with_full_span_if_no_location(problem) if result.translation is None: subsequence = result.location.extract_sequence(problem.sequence) translation = translate( subsequence, table=result.genetic_table, assume_start_codon=result.start_codon is not None, ) result = result.copy_with_changes(translation=translation) if len(result.location) != 3 * len(result.translation): raise ValueError( ( "Window size (%d bp) incompatible with translation " "(%d aa)" ) % (len(result.location), len(result.translation)) ) if (result.start_codon is not None) and result.translation[0] != "M": raise ValueError( ( "Spec. %s specificies a start_codon parameter, but the " "translation at this location does not start with Met. " "Maybe you should provide a 'translation' parameter or " "set up a different Genetic Code." ) % result.label(use_short_form=True) ) return result def evaluate(self, problem): """Score is the number of wrong-translation codons.""" # Note: this method is actually very little used as this specification # class sets the enforced_by_nucleotide_restrictions attribute. location = ( self.location if self.location is not None else Location(0, len(problem.sequence), 1) ) subsequence = location.extract_sequence(problem.sequence) translation = translate( subsequence, table=self.genetic_table, assume_start_codon=self.start_codon is not None, ) errors_locations = [ self.codon_index_to_location(index) for (index, amino_acid) in enumerate(translation) if amino_acid != self.translation[index] ] return SpecEvaluation( self, problem, score=-len(errors_locations), locations=errors_locations, message="All OK." if len(errors_locations) == 0 else "Wrong translation at locations %s" % errors_locations, ) def localized_on_window(self, new_location, start_codon, end_codon): new_translation = self.translation[start_codon:end_codon] if self.location.strand == -1: location_is_at_start = new_location.end >= self.location.end else: location_is_at_start = new_location.start <= self.location.start return self.__class__( location=new_location, translation=new_translation, boost=self.boost, genetic_table=self.genetic_table, start_codon=self.start_codon if location_is_at_start else None # has_start_codon=self.has_start_codon and location_is_at_start, ) def restrict_nucleotides(self, sequence, location=None): if self.backtranslation_table is None: return [] def get_first_codon_choices(first_codon): if self.start_codon is None: return self.backtranslation_table[self.translation[0]] elif isinstance(self.start_codon, (list, tuple)): return list(self.start_codon) elif self.start_codon == "keep": return [first_codon] else: return [self.start_codon] # "ATG" first_codon_location = self.codon_index_to_location(0) first_codon = first_codon_location.extract_sequence(sequence) choices = [ (first_codon_location, get_first_codon_choices(first_codon)) ] + [ (self.codon_index_to_location(i), self.backtranslation_table[aa]) for i, aa in list(enumerate(self.translation))[1:] ] def standardize_choice(choice): location, choices_list = choice if location.strand == -1: choices_list = [reverse_complement(c) for c in choices_list] return (location.to_tuple()[:2], choices_list) return sorted([standardize_choice(choice) for choice in choices]) def __repr__(self): return "EnforceTranslation(%s)" % str(self.location) def __str__(self): return "EnforceTranslation(%s)" % str(self.location) def short_label(self): return "cds" def breach_label(self): return "protein sequence changed"