In [1]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import RDConfig
from rdkit.Chem.Draw import IPythonConsole 
from rdkit.Chem import Draw
import rdkit.Chem.Lipinski as Lipinksy

import numpy as np
from IPython.display import display,Image

Ибупрофен

In [2]:
ibu = Chem.MolFromSmiles('CC(C)CC1=CC=C(C=C1)C(C)C(=O)O')
AllChem.Compute2DCoords(ibu)
display(ibu)

Модифицированный ибупрофен

Заменяем изопропил (в нашем случае кусок CC(C)) на этин он же ацителен (C#C)

In [3]:
ibu_mod = Chem.MolFromSmiles('C#CCC1=CC=C(C=C1)C(C)C(=O)O')
AllChem.Compute2DCoords(ibu_mod)
display(ibu_mod)

Имитируем Click Chemistry

In [4]:
click_ibu_smile = 'N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O'
click_ibu = Chem.MolFromSmiles(click_ibu_smile)
AllChem.Compute2DCoords(click_ibu)
display(click_ibu)

Правила Липински

  • Иметь менее 5 атомов-доноров водородной связи
  • Иметь суммарно не более 10 атомов азота и кислорода (грубая оценка количества акцепторов водородной связи)
  • Обладать молекулярным весом менее 500
  • Иметь липофильность (log P — коэффициент распределения вещества на границе раздела вода-октанол) менее 5
In [5]:
print(Lipinksy.NumHDonors(ibu))
print(Lipinksy.NumHAcceptors(ibu))
print(Lipinksy.rdMolDescriptors.CalcExactMolWt(ibu))
print(Lipinksy.rdMolDescriptors.CalcCrippenDescriptors(ibu)[0])
1
1
206.130679816
3.073200000000001

Поиск на сайте PubChem радикалов c азидом для Click Chemistry

На сайте PubChem был произведен поиск радикалов с азидом по формуле SMILES вида [N-]=[N+]=[N-&!R].

In [6]:
strings = np.genfromtxt('/Users/marina/Desktop/hse/drug_design/hw3/4105545150403456955.txt', dtype=np.str)
print('Num of molecules is ' + str(len(strings)) + '.')

smiles=[]

for line in strings:
    if len(line[1]) < 30 and not '.' in line[1]:
        smiles.append(line[1])
        
print('Num of compatible molecules is ' + str(len(smiles)) + '.')
Num of molecules is 131997.
Num of compatible molecules is 11249.
In [7]:
newsmi = []
azid = 'N=[N+]=[N-]'

for smi in smiles:
    if azid in smi:
        newsmi.append(smi.replace(azid, click_ibu_smile))
    else:
        continue
        
print('Num of compatible molecules is ' + str(len(newsmi)) + '.')
Num of compatible molecules is 7779.
In [8]:
lip5_smi = []
lip5_mol = []

for smi in newsmi:
    try:
        newmol = Chem.MolFromSmiles(smi)

        if Lipinksy.NumHDonors(newmol) < 5 and \
        Lipinksy.rdMolDescriptors.CalcExactMolWt(newmol) < 500 and \
        Lipinksy.rdMolDescriptors.CalcCrippenDescriptors(newmol)[0] < 5 and \
        Lipinksy.NumHAcceptors(newmol) <= 10:
            
            AllChem.Compute2DCoords(newmol)
            lip5_smi.append(smi)
            lip5_mol.append(newmol)
                
    except:
        pass
RDKit ERROR: [18:48:03] SMILES Parse Error: extra open parentheses for input: 'C1=C(C(=C(N1)N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:03] Explicit valence for atom # 4 Cl, 3, is greater than permitted
RDKit ERROR: [18:48:05] SMILES Parse Error: extra open parentheses for input: 'CC1=C(C=CC(=C1CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:06] SMILES Parse Error: extra open parentheses for input: 'CC(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(COC(=O)CCCC'
RDKit ERROR: [18:48:06] SMILES Parse Error: extra open parentheses for input: 'C1=CC(=C(C=C1N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:06] SMILES Parse Error: extra open parentheses for input: 'CC1C=C(C=CC1OCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C(C'
RDKit ERROR: [18:48:07] SMILES Parse Error: extra open parentheses for input: 'C1=CC(=C(C=C1N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:07] SMILES Parse Error: extra open parentheses for input: 'CC1=NC(=C(C(=C1)N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:08] SMILES Parse Error: extra open parentheses for input: 'C1=CC(=CC=C1C(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:08] SMILES Parse Error: extra open parentheses for input: 'C1=CC(=CC=C1C(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:08] SMILES Parse Error: extra open parentheses for input: 'CCOC(=O)C(CCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:08] SMILES Parse Error: extra open parentheses for input: 'CCOC(=O)C(CCCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(CC'
RDKit ERROR: [18:48:08] SMILES Parse Error: extra open parentheses for input: 'C1=C(C=C(C=C1N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:08] SMILES Parse Error: extra open parentheses for input: 'CC(CCC(=O)OCCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:08] SMILES Parse Error: extra open parentheses for input: 'CC1=C(C(C(C(C1N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:08] SMILES Parse Error: extra open parentheses for input: 'C1=CC(C(C(C1N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:09] SMILES Parse Error: extra open parentheses for input: '[B](N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)[P+](CS'
RDKit ERROR: [18:48:09] SMILES Parse Error: extra open parentheses for input: 'C1CC([C@@H](CC1CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:09] SMILES Parse Error: extra open parentheses for input: 'CC(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:09] SMILES Parse Error: extra open parentheses for input: 'C1=C(N(C(=N1)CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:09] SMILES Parse Error: extra open parentheses for input: 'CC(CCC(=O)OCCCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:09] SMILES Parse Error: extra open parentheses for input: 'CCC(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(/C(=C/C'
RDKit ERROR: [18:48:09] SMILES Parse Error: extra open parentheses for input: 'CC(CCCCCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:09] SMILES Parse Error: extra open parentheses for input: 'CC(CCCCCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:09] SMILES Parse Error: extra open parentheses for input: 'CC1=C(C=CC(=C1N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:10] SMILES Parse Error: extra open parentheses for input: 'C1CC(CCC1N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(CC'
RDKit ERROR: [18:48:10] SMILES Parse Error: extra open parentheses for input: 'CC(CCC(=O)OCCCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:11] SMILES Parse Error: extra open parentheses for input: 'C1=CC(=C(C=C1N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:11] SMILES Parse Error: extra open parentheses for input: 'CC1=NC(=C(C(=C1)N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:12] SMILES Parse Error: extra open parentheses for input: 'CC=CC(CCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:12] SMILES Parse Error: extra open parentheses for input: 'CCC=CC(CCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:12] SMILES Parse Error: extra open parentheses for input: 'CCC=CC(CCCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:12] SMILES Parse Error: extra open parentheses for input: 'C(CNC(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:12] SMILES Parse Error: extra open parentheses for input: 'C1=CC(=CC=C1NC(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:12] SMILES Parse Error: extra open parentheses for input: 'C1=CC(=CC=C1NC(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:12] SMILES Parse Error: extra open parentheses for input: 'C(CNC(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:12] SMILES Parse Error: extra open parentheses for input: 'C1=CC(=CC=C1NC(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:14] SMILES Parse Error: extra open parentheses for input: 'C(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)CN(CC'
RDKit ERROR: [18:48:17] Explicit valence for atom # 1 Cl, 2, is greater than permitted
RDKit ERROR: [18:48:18] SMILES Parse Error: extra open parentheses for input: 'CC1=CC(=C(C(=C1)N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:18] SMILES Parse Error: extra open parentheses for input: 'C1=CC(=C(C=C1CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:18] SMILES Parse Error: extra open parentheses for input: 'CC(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)CN1CCC(CC1)(C'
RDKit ERROR: [18:48:18] SMILES Parse Error: extra open parentheses for input: 'C1=C(N=C(C(=N1)N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:19] SMILES Parse Error: extra open parentheses for input: 'C1=CC(=C(C=C1CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:20] SMILES Parse Error: extra open parentheses for input: 'CC(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:21] SMILES Parse Error: extra open parentheses for input: 'CC/C=C/CCC(CCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:21] SMILES Parse Error: extra open parentheses for input: 'CC(=CCC(CCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:21] SMILES Parse Error: extra open parentheses for input: 'C/C=C/CC(CCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:21] SMILES Parse Error: extra open parentheses for input: 'CCC(CCCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:21] SMILES Parse Error: extra open parentheses for input: 'CCC(CCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:21] SMILES Parse Error: extra open parentheses for input: 'C[Si](C)(C)OC(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:21] SMILES Parse Error: extra open parentheses for input: 'CC/C=C\CCC(CCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:21] SMILES Parse Error: extra open parentheses for input: 'CC(=CCC(CCCN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:21] SMILES Parse Error: extra open parentheses for input: 'C(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)[C@@H](C'
RDKit ERROR: [18:48:22] SMILES Parse Error: extra open parentheses for input: 'COC[C@@](CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)(C'
RDKit ERROR: [18:48:22] SMILES Parse Error: extra open parentheses for input: 'C(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)CO/C(=C/[N+]'
RDKit ERROR: [18:48:22] SMILES Parse Error: extra open parentheses for input: 'C(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)CO/C(=C/[N+]'
RDKit ERROR: [18:48:22] SMILES Parse Error: extra open parentheses for input: 'C1=C(C=NC(=C1N1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)C'
RDKit ERROR: [18:48:23] SMILES Parse Error: extra open parentheses for input: 'C(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)COC(=C[N+]'
RDKit ERROR: [18:48:23] SMILES Parse Error: extra open parentheses for input: 'C(CN1C=C(N=N1)C1=CC=C(C=C1)C(C)C(=O)O)COC(=C[N+]'
In [9]:
print('Num of compatible molecules is ' + str(len(lip5_mol)) + '.')
Num of compatible molecules is 7564.

Рисуем кандидатов

In [10]:
img = Draw.MolsToGridImage(lip5_mol[:64], molsPerRow=8, subImgSize=(300, 300))
img
Out[10]:

Найдем молекулу с минимальной молекулярной массой и отрисуем ее.

In [11]:
min_mol = ''
min_mass = 500

for m in lip5_mol:
    
    if min_mass >= Lipinksy.rdMolDescriptors.CalcExactMolWt(m) and \
    Lipinksy.rdMolDescriptors.CalcExactMolWt(click_ibu) != Lipinksy.rdMolDescriptors.CalcExactMolWt(m):
        
                min_mass = Lipinksy.rdMolDescriptors.CalcExactMolWt(m)
                min_mol = m
                
print('Min molecular weight ' + str(min_mass) + '.')
Min molecular weight 218.091403338.
In [12]:
display(min_mol)
In [13]:
m3d = Chem.AddHs(min_mol)
Chem.AllChem.EmbedMolecule(m3d)
AllChem.MMFFOptimizeMolecule(m3d, maxIters = 500, nonBondedThresh = 200)
m3d
Out[13]:
In [35]:
import nglview as nv
nv.show_rdkit(m3d)

nglview отображает, но изображение пропадает в ipynb.

Ниже скриншот, как это работает у меня.

In [37]:
Image(filename = '/Users/marina/Desktop/hse/drug_design/hw3/view.png')
Out[37]: