"""Implement EnforceGCContent."""
import numpy as np
import re
from ..biotools import gc_content, group_nearby_segments
from ..Location import Location
from ..Specification import Specification, SpecEvaluation
[docs]class EnforceGCContent(Specification):
    """Specification on the local or global proportion of G/C nucleotides.
    Shorthand for annotations: "gc".
    Examples
    --------
    >>> # Enforce global GC content between 40 and 70 percent.
    >>> Specification = GCContentSpecification(0.4, 0.7)
    >>> # Enforce 30-80 percent local GC content over 50-nucleotides windows
    >>> Specification = GCContentSpecification(0.3, 0.8, window=50)
    Parameters
    ----------
    mini
      Minimal proportion of G-C (e.g. ``0.35``)
    maxi
      Maximal proportion of G-C (e.g. ``0.75``)
    target
      Target proportion of GC (e.g. ``0.4``), which can be used instead of
      ``mini`` and ``maxi`` when using the specification as an optimization
      objective.
    window
      Length of the sliding window, in nucleotides, for local GC content.
      If not provided, the global GC content of the whole sequence is
      considered
    location
      Location objet indicating that the Specification only applies to a
      subsegment of the sequence. Make sure it is bigger than ``window``
      if both parameters are provided
    """
    best_possible_score = 0
    locations_span = 50  # The resolution will use locations size
    shorthand_name = "gc"
    def __init__(
        self,
        mini=0,
        maxi=1.0,
        target=None,
        window=None,
        location=None,
        boost=1.0,
    ):
        """Initialize."""
        if isinstance(mini, str):
            mini, maxi, target, _window = self.string_to_parameters(mini)
            if _window is not None:
                window = _window
        if target is not None:
            mini = maxi = target
        self.target = target
        self.mini = mini
        self.maxi = maxi
        self.window = window
        location = Location.from_data(location)
        if location is not None and (location.strand == -1):
            location = Location(location.start, location.end, 1)
        self.location = location
        self.boost = boost
    @staticmethod
    def string_to_parameters(pattern):
        target = None
        gc_pattern = r"(\d+)(|-(\d+))%($|/(\d+)bp)"
        match = re.match(gc_pattern, pattern)
        mini, _, maxi, _, window = match.groups()
        if maxi is None:
            target = mini
            mini = None
        mini, maxi, target = [
            None if e is None else int(e) / 100.0 for e in (mini, maxi, target)
        ]
        window = None if window is None else int(window)
        return mini, maxi, target, window
    def initialized_on_problem(self, problem, role=None):
        return self._copy_with_full_span_if_no_location(problem)
    def evaluate(self, problem):
        """Return the sum of breaches extent for all windowed breaches."""
        wstart, wend = self.location.start, self.location.end
        sequence = self.location.extract_sequence(problem.sequence)
        gc = gc_content(sequence, window_size=self.window)
        breaches = np.maximum(0, self.mini - gc) + np.maximum(
            0, gc - self.maxi
        )
        score = -breaches.sum()
        breaches_starts = wstart + (breaches > 0).nonzero()[0]
        if len(breaches_starts) == 0:
            breaches_locations = []
        elif len(breaches_starts) == 1:
            if self.window is not None:
                start = breaches_starts[0]
                breaches_locations = [[start, start + self.window]]
            else:
                breaches_locations = [[wstart, wend]]
        else:
            segments = [(bs, bs + self.window) for bs in breaches_starts]
            groups = group_nearby_segments(
                segments, max_start_spread=max(1, self.locations_span)
            )
            breaches_locations = [
                (group[0][0], group[-1][-1]) for group in groups
            ]
        if breaches_locations == []:
            message = "Passed !"
        else:
            breaches_locations = [Location(*loc) for loc in breaches_locations]
            message = "Out of bound on segments " + ", ".join(
                [str(l) for l in breaches_locations]
            )
        return SpecEvaluation(
            self, problem, score, locations=breaches_locations, message=message
        )
    def localized(self, location, problem=None, with_righthand=True):
        """Localize the GC content evaluation.
        For a location, the GC content evaluation will be restricted
        to [start - window, end + window]
        """
        # if self.location is not None:
        if self.window is None:
            # No specific location policy at the moment for non-windowed GC
            # content. The same specification is returned.
            # NOTE: this could be refined by computing
            # how much the local bounds should be in order for the global
            # sequence not get out of bound. This would be faster but much
            # more complicated.
            return self
        new_location = self.location.overlap_region(location)
        if new_location is None:
            return None
        else:
            extension = 0 if self.window is None else self.window - 1
            extended_location = location.extended(
                extension, right=with_righthand
            )
            new_location = self.location.overlap_region(extended_location)
        return self.copy_with_changes(location=new_location)
    def label_parameters(self):
        show_mini = self.mini is not None and (self.target is None)
        show_maxi = self.maxi is not None and (self.target is None)
        show_target = not (show_mini or show_maxi)
        show_window = self.window is not None
        return (
            show_mini * [("mini", "%.2f" % self.mini)]
            + show_maxi * [("maxi", "%.2f" % self.maxi)]
            + show_target
            * [("target", ("%.2f" % self.target) if self.target else "")]
            + show_window
            * [("window", ("%d" % self.window) if self.window else "")]
        )
    def short_label(self):
        if self.target is not None:
            result = "%d%% GC" % (np.round(100 * self.target))
        else:
            result = "%d-%d%% GC" % (
                np.round(100 * self.mini),
                np.round(100 * self.maxi),
            )
        if self.window is not None:
            result += "/%dbp" % self.window
        return result
    
    def breach_label(self):
        if self.target is not None:
            result = "GC != %d%% " % (np.round(100 * self.target))
        else:
            result = "GC outside %d-%d%%" % (
                np.round(100 * self.mini),
                np.round(100 * self.maxi),
            )
        if self.window is not None:
            result += "/%dbp" % self.window
        return result