"""Misc. plotting and reporting methods, some of which are really arbitrary.
Here is a typical example of use:
>>> import dnachisel.reports.constraint_reports as cr
>>> dataframe = cr.constraints_breaches_dataframe(constraints, sequences)
>>> dataframe.to_excel("output_breaches.xlsx")
>>> records = cr.records_from_breaches_dataframe(dataframe, sequences)
>>> cr.breaches_records_to_pdf(records, 'output_breaches_plots.pdf')
"""
from copy import deepcopy
import re
from io import BytesIO
import proglog
from ...biotools import sequence_to_biopython_record, annotate_record
from ...builtin_specifications import (
    EnforceGCContent,
    AvoidPattern,
    AvoidHairpins,
)
from ..colors_cycle import colors_cycle
from .GraphicTranslator import GraphicTranslator
try:
    import matplotlib.pyplot as plt
    from matplotlib.backends.backend_pdf import PdfPages
    MPL_AVAILABLE = True
except ImportError:
    MPL_AVAILABLE = False
def _sequences_to_new_records(sequences):
    """Turn acceptable sequences input into a records list.
    Acceptable formats are
    - ('name', 'sequence')
    - {'name': 'sequence'}
    - [records] (will be deepcopied)
    """
    if isinstance(sequences, dict):
        sequences = list(sequences.items())
    records = []
    for seq in sequences:
        if hasattr(seq, "id"):
            records.append(deepcopy(seq))
        else:
            name, seq = seq
            records.append(
                sequence_to_biopython_record(seq, id=name, name=name)
            )
    return records
def _parse_location(location_string):
    """Parses locations like 235-350(+)"""
    location_regex = r"(\d+)-(\d+)(\(+\)|\(-\)|)"
    match = re.match(location_regex, location_string.strip())
    start, end, strand = match.groups()
    return int(start), int(end), -1 if strand == "(-)" else 1
[docs]def records_from_breaches_dataframe(dataframe, sequences):
    """Generate records with annotations indicating constraints breaches.
    
    Parameters
    ----------
    dataframe
      A breaches dataframe returned by ``constraints_breaches_dataframe``
    
    sequences
      Either a list [("name", "sequence")...] or a dict {"name": "sequence"}
      or a list of biopython records whole id is the sequence name.
    """
    records = _sequences_to_new_records(sequences)
    for record in records:
        record.features = [
            f
            for f in record.features
            if not f.qualifiers.get("is_a_breach", False)
        ]
    colors_cycle_iterator = colors_cycle()
    columns_colors = {
        c: next(colors_cycle_iterator) for c in dataframe.columns
    }
    for rec, (i, row) in zip(records, dataframe.iterrows()):
        for column in dataframe.columns:
            locations = row[column]
            if not locations:
                continue
            for location in locations.split(","):
                annotate_record(
                    rec,
                    location=_parse_location(location),
                    label=column,
                    color=columns_colors[column],
                    ApEinfo_fwdcolor=columns_colors[column],
                    ApEinfo_revcolor=columns_colors[column],
                    is_a_breach=True,
                )
    return records 
def plot_breaches_record(record, ax=None, figure_width=10):
    translator = GraphicTranslator()
    graphic_record = translator.translate_record(record)
    ax, _ = graphic_record.plot(
        ax=ax, figure_width=figure_width, strand_in_label_threshold=7
    )
    ax.set_title(record.id, loc="left", fontweight="bold")
    ax.set_ylim(top=ax.get_ylim()[1] + 1)
    return ax
[docs]def breaches_records_to_pdf(
    breaches_records, pdf_path=None, figure_width=10, logger="bar"
):
    """Plots figures of the breaches annotated in the records into a PDF file.
    
    Parameters
    ----------
    breaches_records
      A least of records annotated with breaches, as returned by the
    
    pdf_path
      Either the path to a PDF, or a file handle (open in wb mode) or None
      for this method to return binary PDF data.
    
    logger
      Either "bar" for a progress bar, None for no logging, or any Proglog
      logger. The bar name is "sequence".
    """
    pdf_io = BytesIO() if pdf_path is None else pdf_path
    logger = proglog.default_bar_logger(logger, min_time_interval=0.2)
    with PdfPages(pdf_io) as pdf:
        for record in logger.iter_bar(sequence=breaches_records):
            ax = plot_breaches_record(record, figure_width=figure_width)
            pdf.savefig(ax.figure, bbox_inches="tight")
            plt.close(ax.figure)
    if pdf_path is None:
        return pdf_io.getvalue() 
EXAMPLE_MANUFACTURING_CONSTRAINTS = [
    AvoidPattern("BsaI_site"),
    AvoidPattern("BsmBI_site"),
    AvoidPattern("BbsI_site"),
    AvoidPattern("SapI_site"),
    AvoidPattern("9xA"),
    AvoidPattern("9xT"),
    AvoidPattern("6xG"),
    AvoidPattern("6xC"),
    AvoidPattern("5x3mer"),
    AvoidPattern("9x2mer"),
    AvoidHairpins(stem_size=20, hairpin_window=200),
    EnforceGCContent(mini=0.3, maxi=0.7, window=100),
]