In [1]:
import numpy as np
import copy

# Отображение структур
import IPython.display
import ipywidgets
from IPython.display import display,display_svg,SVG,Image

# Open Drug Discovery Toolkit
import oddt
import oddt.docking
import oddt.interactions

# Органика
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole

import pmx # Модуль для манипулирования pdb 
In [2]:
pdb=pmx.Model('pluslig.B99990001.pdb')

for r in pdb.residues[135:]:
    print r #посмотрим остатки   
<Molecule: id = 136 name = ASN chain_id =    natoms = 8>
<Molecule: id = 137 name = ARG chain_id =    natoms = 11>
<Molecule: id = 138 name = ASP chain_id =    natoms = 8>
<Molecule: id = 139 name = VAL chain_id =    natoms = 7>
<Molecule: id = 140 name = SER chain_id =    natoms = 6>
<Molecule: id = 141 name = GLN chain_id =    natoms = 9>
<Molecule: id = 142 name = TYR chain_id =    natoms = 12>
<Molecule: id = 143 name = VAL chain_id =    natoms = 7>
<Molecule: id = 144 name = GLN chain_id =    natoms = 9>
<Molecule: id = 145 name = GLY chain_id =    natoms = 4>
<Molecule: id = 146 name = CYS chain_id =    natoms = 6>
<Molecule: id = 147 name = GLY chain_id =    natoms = 4>
<Molecule: id = 148 name = VAL chain_id =    natoms = 8>
<Molecule: id = 149 name = NAG chain_id =    natoms = 14>
<Molecule: id = 150 name = NAG chain_id =    natoms = 14>
<Molecule: id = 151 name = NDG chain_id =    natoms = 15>
In [3]:
newpdb = pdb.copy()
for r in newpdb.residues[-3:]:
    newpdb.remove_residue(r)
lig = pdb.copy()
del lig.residues[:-3]
In [4]:
#будем считать за центр среднее по всем координатам
coordinates = []
for a in lig.atoms:
    coordinates.append(a.x)
center = np.mean(coordinates,axis=0)
print(center)
[ 40.4829129   31.7626005   23.15076884]
In [5]:
newpdb.writePDB('newpdb.pdb')
lig.writePDB('lig.pdb')

Подготовка белка

In [6]:
prot = oddt.toolkit.readfile('pdb','newpdb.pdb').next()

prot.OBMol.AddPolarHydrogens()
prot.OBMol.AutomaticPartialCharge()

print 'is it the first mol in 1lmp is protein?',prot.protein,':) and MW of this mol is:', prot.molwt 
is it the first mol in 1lmp is protein? False :) and MW of this mol is: 16400.43442
In [7]:
print(' ¯\_(ツ)_/¯ ')
 ¯\_(ツ)_/¯ 

Лиганды

NAG Smiles: O=C(NC1C(O)C(O)C(OC1O)CO)C

Метильный радикал: CH3

In [8]:
smiles = ['OC(=O)NC1C(C(C(OC1O)CO)O)O',               #OH
          '[NH3+]C(=O)NC1C(C(C(OC1O)CO)O)O',          #NH3+
          'C(=O)NC1C(C(C(OC1O)CO)O)O',                #H
          'C1=CC=[C]C=C1C(=O)NC1C(C(C(OC1O)CO)O)O',   #PH
          '[O-]C(=O)C(=O)NC1C(C(C(OC1O)CO)O)O']       #COO
#smiles = ['c1cccc(O)c1', 'c1c(O)ccc(O)c1','c1(O)cc(c2ccccc2)cc(O)c1']
mols= []
images =[]

for s in smiles:
    m = oddt.toolkit.readstring('smi', s)
    if not m.OBMol.Has3D(): 
        m.make3D(forcefield='mmff94', steps=150)
        m.removeh()
        m.OBMol.AddPolarHydrogens()

    mols.append(m)
    ###with print m.OBMol.Has3D() was found that:
    ### deep copy needed to keep 3D , write svg make mols flat
    images.append((SVG(copy.deepcopy(m).write('svg'))))

display_svg(*images)  
***** - Open Babel Depiction O H H H H H H O O O O O N O *****
***** - Open Babel Depiction H H H H H H H H O O O N O O N O *****
***** - Open Babel Depiction O H H H H H O O O O N O *****
***** - Open Babel Depiction H H H H H O O O O O N O *****
***** - Open Babel Depiction H H H H H O O O O O O N O HO *****

Докинг

In [9]:
#create docking object
dock_obj = oddt.docking.AutodockVina.autodock_vina(
    protein=prot, size=(20,20,20), center=center, num_modes = 5,
    executable='/usr/bin/vina',autocleanup=True)

print dock_obj.tmp_dir
print " ".join(dock_obj.params) # Опишите выдачу
/tmp/autodock_vina_GNyuHn
--center_x 40.4829128978 --center_y 31.7626005025 --center_z 23.1507688442 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 5 --energy_range 3
In [10]:
# do it
res = dock_obj.dock(mols,prot)

Результаты

In [13]:
import pandas as pd
In [42]:
subs=['OH']*5+['NH3+']*5+['H']*5+['Ph']*5+['COO-']*5
tablet = np.array([[i,r.formula, r.data['vina_affinity'],  r.data['vina_rmsd_ub'], r.residues[0].name] for i,r in enumerate(res)])
tablet=np.column_stack((tablet, subs))
tablet=pd.DataFrame(tablet)
tablet.columns=['number', 'formula','affinity','rmsd','res', 'replaced by']
tablet_sorted = tablet.sort_values(by='affinity', ascending=False)
print (tablet_sorted)
   number    formula affinity   rmsd  res replaced by
15     15  C13H17NO6     -6.9  0.000  UNL          Ph
16     16  C13H17NO6     -6.6  6.473  UNL          Ph
17     17  C13H17NO6     -6.5  2.813  UNL          Ph
18     18  C13H17NO6     -6.4  7.448  UNL          Ph
19     19  C13H17NO6     -6.3  4.160  UNL          Ph
21     21   C8H13NO8     -6.1  4.032  UNL        COO-
20     20   C8H13NO8     -6.1  0.000  UNL        COO-
0       0   C7H13NO7     -5.9  0.000  UNL          OH
22     22   C8H13NO8     -5.9  7.961  UNL        COO-
23     23   C8H13NO8     -5.8  5.836  UNL        COO-
1       1   C7H13NO7     -5.8  4.819  UNL          OH
5       5  C7H15N2O6     -5.7  0.000  UNL        NH3+
2       2   C7H13NO7     -5.6  5.962  UNL          OH
24     24   C8H13NO8     -5.5  1.930  UNL        COO-
7       7  C7H15N2O6     -5.5  6.162  UNL        NH3+
6       6  C7H15N2O6     -5.5  5.023  UNL        NH3+
10     10   C7H13NO6     -5.4  0.000  UNL           H
3       3   C7H13NO7     -5.4  3.487  UNL          OH
11     11   C7H13NO6     -5.3  4.628  UNL           H
9       9  C7H15N2O6     -5.3  5.906  UNL        NH3+
8       8  C7H15N2O6     -5.3  4.999  UNL        NH3+
12     12   C7H13NO6     -5.3  4.742  UNL           H
4       4   C7H13NO7     -5.2  3.234  UNL          OH
14     14   C7H13NO6     -5.1  5.904  UNL           H
13     13   C7H13NO6     -5.1  4.441  UNL           H
In [43]:
for i,r in enumerate(res):
    hbs = oddt.interactions.hbonds(prot,r)
    stack= oddt.interactions.pi_stacking(prot,r)
    phob = oddt.interactions.hydrophobic_contacts(prot,r)
In [45]:
for i,r in enumerate(res):
    r.write(filename='r%s.pdb' % i, format='pdb', overwrite=True)

Визуализация

Нас интересует №15 (наименьший показатель аффинности)

In [46]:
import __main__
__main__.pymol_argv = ['pymol','-qc'] # Pymol: quiet and no GUI
from time import sleep
import pymol
pymol.finish_launching()
In [86]:
pymol.cmd.reinitialize()
pymol.cmd.do('''
    load r15.pdb
    load newpdb.pdb   
    show cartoon, newpdb
    hide lines, newpdb
    show sticks, r15
    orient
    zoom r15, 10    
    png picture.png
''')
In [87]:
Image(filename='picture.png')
Out[87]:

Мы могли бы применить RFscore и NNscore для подсчета score докинга; предсказывать аффиность с помощью svm и др. моделей.