Source code for dnachisel.reports.constraints_reports.constraints_reports

"""Misc. plotting and reporting methods, some of which are really arbitrary.


Here is a typical example of use:

>>> import dnachisel.reports.constraint_reports as cr
>>> dataframe = cr.constraints_breaches_dataframe(constraints, sequences)
>>> dataframe.to_excel("output_breaches.xlsx")
>>> records = cr.records_from_breaches_dataframe(dataframe, sequences)
>>> cr.breaches_records_to_pdf(records, 'output_breaches_plots.pdf')
"""

from copy import deepcopy
import re
from io import BytesIO

import proglog

from ...biotools import sequence_to_biopython_record, annotate_record
from ...builtin_specifications import (
    EnforceGCContent,
    AvoidPattern,
    AvoidHairpins,
)
from ..colors_cycle import colors_cycle

from .GraphicTranslator import GraphicTranslator

try:
    import matplotlib.pyplot as plt
    from matplotlib.backends.backend_pdf import PdfPages
    MPL_AVAILABLE = True
except ImportError:
    MPL_AVAILABLE = False

def _sequences_to_new_records(sequences):
    """Turn acceptable sequences input into a records list.

    Acceptable formats are
    - ('name', 'sequence')
    - {'name': 'sequence'}
    - [records] (will be deepcopied)
    """
    if isinstance(sequences, dict):
        sequences = list(sequences.items())
    records = []
    for seq in sequences:
        if hasattr(seq, "id"):
            records.append(deepcopy(seq))
        else:
            name, seq = seq
            records.append(
                sequence_to_biopython_record(seq, id=name, name=name)
            )
    return records


def _parse_location(location_string):
    """Parses locations like 235-350(+)"""
    location_regex = r"(\d+)-(\d+)(\(+\)|\(-\)|)"
    match = re.match(location_regex, location_string.strip())
    start, end, strand = match.groups()
    return int(start), int(end), -1 if strand == "(-)" else 1



[docs]def records_from_breaches_dataframe(dataframe, sequences): """Generate records with annotations indicating constraints breaches. Parameters ---------- dataframe A breaches dataframe returned by ``constraints_breaches_dataframe`` sequences Either a list [("name", "sequence")...] or a dict {"name": "sequence"} or a list of biopython records whole id is the sequence name. """ records = _sequences_to_new_records(sequences) for record in records: record.features = [ f for f in record.features if not f.qualifiers.get("is_a_breach", False) ] colors_cycle_iterator = colors_cycle() columns_colors = { c: next(colors_cycle_iterator) for c in dataframe.columns } for rec, (i, row) in zip(records, dataframe.iterrows()): for column in dataframe.columns: locations = row[column] if not locations: continue for location in locations.split(","): annotate_record( rec, location=_parse_location(location), label=column, color=columns_colors[column], ApEinfo_fwdcolor=columns_colors[column], ApEinfo_revcolor=columns_colors[column], is_a_breach=True, ) return records
def plot_breaches_record(record, ax=None, figure_width=10): translator = GraphicTranslator() graphic_record = translator.translate_record(record) ax, _ = graphic_record.plot( ax=ax, figure_width=figure_width, strand_in_label_threshold=7 ) ax.set_title(record.id, loc="left", fontweight="bold") ax.set_ylim(top=ax.get_ylim()[1] + 1) return ax
[docs]def breaches_records_to_pdf( breaches_records, pdf_path=None, figure_width=10, logger="bar" ): """Plots figures of the breaches annotated in the records into a PDF file. Parameters ---------- breaches_records A least of records annotated with breaches, as returned by the pdf_path Either the path to a PDF, or a file handle (open in wb mode) or None for this method to return binary PDF data. logger Either "bar" for a progress bar, None for no logging, or any Proglog logger. The bar name is "sequence". """ pdf_io = BytesIO() if pdf_path is None else pdf_path logger = proglog.default_bar_logger(logger, min_time_interval=0.2) with PdfPages(pdf_io) as pdf: for record in logger.iter_bar(sequence=breaches_records): ax = plot_breaches_record(record, figure_width=figure_width) pdf.savefig(ax.figure, bbox_inches="tight") plt.close(ax.figure) if pdf_path is None: return pdf_io.getvalue()
EXAMPLE_MANUFACTURING_CONSTRAINTS = [ AvoidPattern("BsaI_site"), AvoidPattern("BsmBI_site"), AvoidPattern("BbsI_site"), AvoidPattern("SapI_site"), AvoidPattern("9xA"), AvoidPattern("9xT"), AvoidPattern("6xG"), AvoidPattern("6xC"), AvoidPattern("5x3mer"), AvoidPattern("9x2mer"), AvoidHairpins(stem_size=20, hairpin_window=200), EnforceGCContent(mini=0.3, maxi=0.7, window=100), ]