Source code for dnachisel.builtin_specifications.codon_optimization.AvoidRareCodons
"Implement AvoidRareCodons."
from ...Specification import SpecEvaluation
from ...biotools import reverse_complement
from .BaseCodonOptimizationClass import BaseCodonOptimizationClass
[docs]class AvoidRareCodons(BaseCodonOptimizationClass):
"""Avoid the use of codons with low frequency.
This can be seen as a "mild" form of codon optimization where only rare
codons (which slow down protein synthesis) are considered.
WARNING: Make sure to always use this specification with EnforceTranslation
to preserve the amino-acid sequence.
Shorthand for annotations: "no_rare_codons".
Parameters
-----------
min_frequency
Minimal frequency accepted for a given codon.
species
Name or TaxID of the species for which to optimize the sequence. A custom
codon_usage_table can be provided instead (or in addition, for species
names whose codon usage table cannot be imported).
codon_usage_table
Optional codon usage table of the species for which the sequence will be
codon-optimized, which can be provided instead of ``species``. A dict of
the form ``{'*': {"TGA": 0.112, "TAA": 0.68}, 'K': ...}`` giving the RSCU
table (relative usage of each codon). See parameter ``species`` above.
location
Either a DnaChisel Location or a tuple of the form (start, end, strand)
or just (start, end), with strand defaulting to +1, indicating the
position of the gene to codon-optimize. If not provided, the whole
sequence is considered as the gene. The location should have a length
that is a multiple of 3. The location strand is either 1 if the gene is
encoded on the (+) strand, or -1 for antisense.
boost
Score multiplicator (=weight) for when the specification is used as an
optimization objective alongside competing objectives.
"""
best_possible_score = 0
enforced_by_nucleotide_restrictions = True
shorthand_name = "no_rare_codons"
def __init__(
self,
min_frequency,
species=None,
codon_usage_table=None,
location=None,
boost=1.0,
):
"""Initialize."""
BaseCodonOptimizationClass.__init__(
self,
species=species,
codon_usage_table=codon_usage_table,
location=location,
boost=boost,
)
self.min_frequency = min_frequency
self.codons_frequencies = {
codon: freq
for aa, aa_data in self.codon_usage_table.items()
if len(aa) == 1
for codon, freq in aa_data.items()
}
self.rare_codons = sorted(
[
codon
for codon, frequency in self.codons_frequencies.items()
if frequency < min_frequency
]
)
self.nonrare_codons = sorted(
[
codon
for codon, frequency in self.codons_frequencies.items()
if frequency >= min_frequency
]
)
def evaluate(self, problem):
"""Score is the sum of (freq - min_frequency) for all rare codons."""
# Note: this method is actually very little used as this specification
# class sets the enforced_by_nucleotide_restrictions attribute.
codons = self.get_codons(problem)
rare_codons_indices = [
i for i, codon in enumerate(codons) if codon in self.rare_codons
]
locations = self.codons_indices_to_locations(rare_codons_indices)
score = (
0
if (len(locations) == 0)
else sum(
(self.codons_frequencies[codons[i]] - self.min_frequency)
for i in rare_codons_indices
)
)
return SpecEvaluation(
self,
problem,
score=score,
locations=locations,
message="All OK."
if len(locations) == 0
else "Rare codons at locations %s" % locations,
)
def restrict_nucleotides(self, sequence, location=None):
nonrare_codons = list(self.nonrare_codons)
if self.location.strand == -1:
nonrare_codons = sorted(
[reverse_complement(c) for c in nonrare_codons]
)
return [
((i, i + 3), nonrare_codons)
for i in range(self.location.start, self.location.end, 3)
]
def _params_string(self):
"""Parameters representation used in __repr__, __str__, etc."""
return "%d%%, %s" % (100 * self.min_frequency, str(self.species))
def __repr__(self):
return "AvoidRareCodons(%s)" % self._params_string()
def __str__(self):
return "AvoidRareCodons(%s)" % self._params_string()
def short_label(self):
return "no_rare_codons(%s)" % self._params_string()