import os
from io import StringIO, BytesIO
import json
import pandas as pd
import urllib.request
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
IPythonConsole.molSize=(450, 200)
from rdkit.Chem import rdRGroupDecomposition, rdqueries, rdDepictor, PandasTools
from rdkit.Chem.Draw import rdMolDraw2D, IPythonConsole
from rdkit import Geometry
from IPython.display import Image
from PIL import Image as pilImage

IPythonConsole.InteractiveRenderer.rdkitStructureRendererJsUrl = "https://unpkg.com/rdkit-structure-renderer@0.0.1/dist/rdkit-structure-renderer-module.js"
IPythonConsole.InteractiveRenderer.minimalLibJsUrl = "https://unpkg.com/rdkit-structure-renderer@0.0.1/public/RDKit_minimal.0.0.1.js"

IPythonConsole.InteractiveRenderer.setEnabled()

import rdkit
print(rdkit.__version__)

2024.09.1pre

cdk2_rgd_dataset = "cdk2_rgd_dataset.csv"

if not os.path.exists(cdk2_rgd_dataset):
    rdDepictor.SetPreferCoordGen(True)
    cdk2_rgd_dataset_csv_url = "https://api.hub.knime.com/repository/Users/greglandrum/Public/Presentations/2020/2020_11_05_RDKitPython_Webinar/Data/cdk2_rgd_dataset.csv:data?version=current-state"
    response = urllib.request.urlopen(cdk2_rgd_dataset_csv_url)
    assert response.status == 200
    with StringIO(response.read().decode("utf-8")) as hnd_in:
        with open(cdk2_rgd_dataset, "w") as hnd_out:
            hnd_out.write(hnd_in.read())

assert os.path.exists(cdk2_rgd_dataset)

with open(cdk2_rgd_dataset) as hnd:
    df = pd.read_csv(hnd)

df_doc1 = df[df.assay_chembl_id == 'CHEMBL827377']

chembl_ids = df_doc1.compound_chembl_id.to_list()
mols = df_doc1["canonical_smiles"].apply(Chem.MolFromSmiles).to_list()

core = Chem.MolFromSmiles("c1cc(-c2c([*:1])nn3nc([*:2])ccc23)nc(N(c2ccc([*:4])c([*:3])c2))n1")
rdDepictor.Compute2DCoords(core)
core

params = rdRGroupDecomposition.RGroupDecompositionParameters()
params.includeTargetMolInResults = True

rdkit.RDLogger.DisableLog("rdApp.warning")
groups, _ = rdRGroupDecomposition.RGroupDecompose([core], mols, asSmiles=False, asRows=True, options=params)
rdkit.RDLogger.EnableLog("rdApp.warning")

groups[0]

{'Core': <rdkit.Chem.rdchem.Mol at 0x7f1b8a0a05f0>,
 'Mol': <rdkit.Chem.rdchem.Mol at 0x7f1b8a0a0740>,
 'R1': <rdkit.Chem.rdchem.Mol at 0x7f1b8a0a07b0>,
 'R2': <rdkit.Chem.rdchem.Mol at 0x7f1b8a0a0820>,
 'R3': <rdkit.Chem.rdchem.Mol at 0x7f1b8a0a0890>,
 'R4': <rdkit.Chem.rdchem.Mol at 0x7f1b8a0a0900>,
 'R5': <rdkit.Chem.rdchem.Mol at 0x7f1b8a0a0970>,
 'R6': <rdkit.Chem.rdchem.Mol at 0x7f1b8a0a09e0>,
 'R7': <rdkit.Chem.rdchem.Mol at 0x7f1b8a0a0a50>}

class RGroupHighLights:
    """Class to extract and store highlight maps from R groups."""

    def __init__(self):
        self.atom_map = {}
        self.bond_map = {}
        self.radii = {}
        self.linewidth_multipliers = {}
        # ------------------
        #  set up our colormap
        #   the three choices here are all "colorblind" colormaps

        # "Tol" colormap from https://davidmathlogic.com/colorblind
        colors = [(51,34,136), (17,119,51), (68,170,153), (136,204,238), (221,204,119), (204,102,119), (170,68,153), (136,34,85)]
        # "IBM" colormap from https://davidmathlogic.com/colorblind
        colors = [(100, 143, 255), (120, 94, 240), (220, 38, 127), (254, 97, 0), (255, 176, 0)]
        # Okabe_Ito colormap from https://jfly.uni-koeln.de/color/
        colors = [(230, 159, 0),(86, 180, 233), (0, 158, 115), (240, 228, 66), (0, 114, 178), (213, 94, 0), (204, 121, 167)]
        for i, x in enumerate(colors):
            colors[i] = tuple(y/255 for y in x)
        self.colors = colors

    def update(self, rgroup, rlabel_idx):
        """Update highlights with information from this R group"""
        target_atoms = json.loads(rgroup.GetProp("_rgroupTargetAtoms")) if rgroup.HasProp("_rgroupTargetAtoms") else []
        target_bonds = json.loads(rgroup.GetProp("_rgroupTargetBonds")) if rgroup.HasProp("_rgroupTargetBonds") else []
        color = self.colors[rlabel_idx % len(self.colors)]
        for i in target_atoms:
            v = self.atom_map.get(i, [])
            v.append(color)
            self.atom_map[i] = v
            self.radii[i] = 0.4
        for i in target_bonds:
            v = self.bond_map.get(i, [])
            v.append(color)
            self.bond_map[i] = v
            self.linewidth_multipliers[i] = 2

def highlight_rgroups(row, width=350, height=200, legend="",
                      labels=("R1", "R2", "R3", "R4"), svg=False):
    """
    Params:
    row: a dict whose keys are Mol, Core and all R extracted groups
    (i.e., an element of the array generated by rdRGroupDecomposition.RGroupDecompose
    width, height: size of each image
    legend: optional legend to be associated to the mol image
    labels: labels to be associated to highlights
    svg: if True an SVG is generated, if False a PNG
    """
    mol = row["Mol"]
    core = row["Core"]
    ps = Chem.AdjustQueryParameters.NoAdjustments()
    ps.makeDummiesQueries = True
    qcore = Chem.AdjustQueryProperties(core, ps)
    highlights = RGroupHighLights()
    for rlabel, rgroup in row.items():
        if rlabel not in labels:
            continue
        rlabel_idx = labels.index(rlabel)
        highlights.update(rgroup, rlabel_idx)
    rdDepictor.GenerateDepictionMatching2DStructure(mol, qcore, allowRGroups=True)
    if svg:
        drawer = rdMolDraw2D.MolDraw2DSVG
    else:
        drawer = rdMolDraw2D.MolDraw2DCairo
    d2d = drawer(width, height)
    d2d.drawOptions().useBWAtomPalette()
    #----------------------
    # now draw the molecule, with highlights:
    d2d.DrawMoleculeWithHighlights(mol, legend, highlights.atom_map, highlights.bond_map,
                                   highlights.radii, highlights.linewidth_multipliers)
    d2d.FinishDrawing()
    img = d2d.GetDrawingText()
    return img

def draw_multiple(groups, labels, legends=None, nPerRow=4, subImageSize=(250, 200)):
    nRows = (len(groups) - 1) // nPerRow + 1
    imgSize = (subImageSize[0] * nPerRow, subImageSize[1] * nRows)
    res = pilImage.new("RGB", imgSize, color="white")
    
    for i, group in enumerate(groups):
        col = i % nPerRow
        row = i // nPerRow
        if legends:
            legend = legends[i]
        else:
            legend = ''
        png = highlight_rgroups(group, labels=labels, legend=legend,
                               width=subImageSize[0], height=subImageSize[1])
        with BytesIO(png) as hnd:
            img = pilImage.open(hnd)
            res.paste(img, box=(col * subImageSize[0], row * subImageSize[1]))
    with BytesIO() as hnd:
        res.save(hnd, format="PNG")
        png = hnd.getvalue()
    return png

Image(draw_multiple(groups, labels=("R1", "R2", "R3", "R4"), legends=chembl_ids, subImageSize=(300, 250)))

PandasTools.molSize = (200, 100)

bw_palette = {str(i): [0, 0, 0] for i in range(120)}
for group in groups:
    highlights = RGroupHighLights()
    for rlabel_idx, rlabel in enumerate(("R1", "R2", "R3", "R4")):
        highlights.update(group[rlabel], rlabel_idx)
    draw_opts_json = json.dumps({
        "atomColourPalette": bw_palette,
        "highlightAtomMultipleColors": highlights.atom_map,
        "highlightBondMultipleColors": highlights.bond_map,
        "highlightAtomRadii": highlights.radii,
        "highlightLineWidthMultipliers": highlights.linewidth_multipliers
    })
    IPythonConsole.InteractiveRenderer.setOpts(group["Mol"], {
        "width": 300,
        "height": 200,
        "drawOpts": draw_opts_json
    })

df = pd.DataFrame([{k: group[k] for k in ("Mol", "R1", "R2", "R3", "R4")} for group in groups])

df.insert(0, "ChEMBL id", [chembl_ids[i] for i in df.index])

df = df.set_index("ChEMBL id")

PandasTools.ChangeMoleculeRendering(df)

df

	Mol	R1	R2	R3	R4
ChEMBL id
CHEMBL182493
CHEMBL182326
CHEMBL183064
CHEMBL361038
CHEMBL362296
...	...	...	...	...	...
CHEMBL364646
CHEMBL189874
CHEMBL186826
CHEMBL434158
CHEMBL434729

R-Group Decomposition highlighting made easier (and exposed to JS)¶