Домашнее задание 7

In [1]:
import numpy as np
import copy

# Отображение структур
import IPython.display
import ipywidgets
from IPython.display import display,display_svg,SVG,Image

# Open Drug Discovery Toolkit
import oddt
import oddt.docking
import oddt.interactions
from tabulate import tabulate

# Органика
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole

import pmx # Модуль для манипулирования pdb 

Подготовим белок:

In [3]:
pdb=pmx.Model('P08905_with_ligand.B99990001.pdb')
for r in pdb.residues[135:]:
    print r #посмотрим остатки
<Molecule: id = 136 name = ARG chain_id =    natoms = 11>
<Molecule: id = 137 name = ASP chain_id =    natoms = 8>
<Molecule: id = 138 name = LEU chain_id =    natoms = 8>
<Molecule: id = 139 name = SER chain_id =    natoms = 6>
<Molecule: id = 140 name = GLN chain_id =    natoms = 9>
<Molecule: id = 141 name = TYR chain_id =    natoms = 12>
<Molecule: id = 142 name = ILE chain_id =    natoms = 8>
<Molecule: id = 143 name = ARG chain_id =    natoms = 11>
<Molecule: id = 144 name = ASN chain_id =    natoms = 8>
<Molecule: id = 145 name = CYS chain_id =    natoms = 6>
<Molecule: id = 146 name = GLY chain_id =    natoms = 4>
<Molecule: id = 147 name = VAL chain_id =    natoms = 8>
<Molecule: id = 148 name = NAG chain_id =    natoms = 14>
<Molecule: id = 149 name = NAG chain_id =    natoms = 14>
<Molecule: id = 150 name = NDG chain_id =    natoms = 15>
In [4]:
# создание объектов белок и лиганда
X=3
newpdb = pdb.copy()
for r in newpdb.residues[-X:]:
    newpdb.remove_residue(r)
lig = pdb.copy()
del lig.residues[:-X]

Все успешно разделилось:

In [5]:
for i in lig.residues:
    print i
<Molecule: id = 148 name = NAG chain_id =    natoms = 14>
<Molecule: id = 149 name = NAG chain_id =    natoms = 14>
<Molecule: id = 150 name = NDG chain_id =    natoms = 15>

Найдем геометрический центр лиганда:

In [6]:
s=np.zeros(3)
for a in lig.atoms:
    s=s+a.x
centr=s/len(lig.atoms)
print centr # найдите геометрический центр лиганда
#пишем белок без лиганда
newpdb.writePDB('newpdb.pdb')
[ 41.81467857  31.92851578  23.82723671]

Подготовка белка для докинга:

In [7]:
prot = oddt.toolkit.readfile('pdb','newpdb.pdb').next()

prot.OBMol.AddPolarHydrogens()
prot.OBMol.AutomaticPartialCharge()
mols=[]

print 'is it the first mol in 1lmp is protein?',prot.protein,':) and MW of this mol is:', prot.molwt 
is it the first mol in 1lmp is protein? False :) and MW of this mol is: 16549.5497

Функция отрисовки смайлов:

In [8]:
def drawSmile(smiles,mols):
    images =[]

    for s in smiles:
        m = oddt.toolkit.readstring('smi', s)
        if not m.OBMol.Has3D(): 
            m.make3D(forcefield='mmff94', steps=150)
            m.removeh()
            m.OBMol.AddPolarHydrogens()

        mols.append(m)
        ###with print m.OBMol.Has3D() was found that:
        ### deep copy needed to keep 3D , write svg make mols flat
        images.append((SVG(copy.deepcopy(m).write('svg'))))

    display_svg(*images)
In [9]:
smiles = ['c1cccc(O)c1', 'c1c(O)ccc(O)c1','c1(O)cc(c2ccccc2)cc(O)c1']
drawSmile(smiles,mols)
***** - Open Babel Depiction O H *****
***** - Open Babel Depiction O O H H *****
***** - Open Babel Depiction O O H H *****

Функция для докинга:

In [10]:
def docking(centr,prot,mols):
    #create docking object
    xx=centr[0]
    yy=centr[1]
    zz=centr[2]
    #установлен лимит в 1-9 моделей
    dock_obj= oddt.docking.AutodockVina.autodock_vina(
        protein=prot,size=(20,20,20),center=[xx,yy,zz],
        executable='/usr/bin/vina',autocleanup=True, num_modes=9)
    
    print dock_obj.tmp_dir
    print " ".join(dock_obj.params) # Опишите выдачу

    # do it
    res = dock_obj.dock(mols,prot)
    
    #анализ докинга
    for i,r in enumerate(res):
        hbs = oddt.interactions.hbonds(prot,r)
        stack= oddt.interactions.pi_stacking(prot,r)
        phob = oddt.interactions.hydrophobic_contacts(prot,r)

    return res

Функция для результатов докинга:

Визуализация

In [11]:
def results(res,n):
    param=len(res)/n
    d_affinity = [[] for x in xrange(n)]
    d_rmsd=[[] for x in xrange(n)]
    names=[]
    for i,r in enumerate(res):
        print "%4d%10s%8s%8s%8s" %(i,r.formula, r.data['vina_affinity'],  r.data['vina_rmsd_ub'], r.residues[0].name)
        d_affinity[i / param].append(r.data['vina_affinity'])
        d_rmsd[i / param].append(r.data['vina_rmsd_ub'])
        names.append(r.formula)
    setlist=set(names)
    # на выходе - данные по афинности, rmsd и названия молекул
    return d_affinity,d_rmsd,list(setlist)
In [12]:
def visualization(res,name):
    for i,r in enumerate(res):
        r.write(filename=name % i, format='pdb',overwrite=True)
        
    
#pymol .pdb r*pdb

Функция для создания красивого вывода таблиц:

In [13]:
def tablichka(affin,rmsd,names):
    row_format ="{:>15}" * (len(names) + 1)
    rows=list(range(0,len(affin)*len(affin[0])))
    print ("AFFINITY")
    print tabulate(map(list, zip(*affin)),headers=names)
    print ("===================================")
    print ("RMSD")
    print tabulate(map(list, zip(*rmsd)),headers=names)
    
    
In [14]:
res=docking(centr,prot,mols)
/tmp/autodock_vina_UwDQy7
--center_x 41.8146785714 --center_y 31.9285157807 --center_z 23.827236711 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 9 --energy_range 3
In [17]:
affin,rmsd,names=results(res,3)
   0     C6H6O    -4.2   0.000     UNL
   1     C6H6O    -4.2   1.829     UNL
   2     C6H6O    -4.0   2.145     UNL
   3     C6H6O    -4.0   1.797     UNL
   4     C6H6O    -3.9   2.560     UNL
   5     C6H6O    -3.9   3.188     UNL
   6     C6H6O    -3.4   2.496     UNL
   7     C6H6O    -3.2   4.389     UNL
   8     C6H6O    -3.2   4.256     UNL
   9    C6H6O2    -4.3   0.000     UNL
  10    C6H6O2    -4.3   1.714     UNL
  11    C6H6O2    -4.3   3.656     UNL
  12    C6H6O2    -4.3   3.231     UNL
  13    C6H6O2    -4.1   3.348     UNL
  14    C6H6O2    -4.0   1.405     UNL
  15    C6H6O2    -4.0   3.016     UNL
  16    C6H6O2    -3.6   2.923     UNL
  17    C6H6O2    -3.5   2.184     UNL
  18  C12H10O2    -6.1   0.000     UNL
  19  C12H10O2    -6.1   4.964     UNL
  20  C12H10O2    -6.0   2.188     UNL
  21  C12H10O2    -5.9   3.924     UNL
  22  C12H10O2    -5.7   2.615     UNL
  23  C12H10O2    -5.4   4.880     UNL
  24  C12H10O2    -5.3   5.651     UNL
  25  C12H10O2    -5.1   3.845     UNL
  26  C12H10O2    -4.9   5.486     UNL
In [15]:
visualization(res,'r%s.pdb')
In [18]:
tablichka(affin,rmsd,names)
AFFINITY
  C6H6O2    C6H6O    C12H10O2
--------  -------  ----------
    -4.2     -4.3        -6.1
    -4.2     -4.3        -6.1
    -4       -4.3        -6
    -4       -4.3        -5.9
    -3.9     -4.1        -5.7
    -3.9     -4          -5.4
    -3.4     -4          -5.3
    -3.2     -3.6        -5.1
    -3.2     -3.5        -4.9
===================================
RMSD
  C6H6O2    C6H6O    C12H10O2
--------  -------  ----------
   0        0           0
   1.829    1.714       4.964
   2.145    3.656       2.188
   1.797    3.231       3.924
   2.56     3.348       2.615
   3.188    1.405       4.88
   2.496    3.016       5.651
   4.389    2.923       3.845
   4.256    2.184       5.486

Чем меньше афинность, тем лучше. Поэтому видно, что для C12H10O2 1 вариант в таблице со значением афинности -6.1 и rmsd 0 будет наилучшим. Изобразим белок и лиганд:

In [21]:
Image(filename="Screenshot_1.png")
Out[21]:

Создадим лиганды где метильный радикал СH3C(=O)NH группы будет заменен на следующие группы:

  • OH
  • NH3+
  • H
  • Ph
  • COO-

OH

In [22]:
smile=(['C1(C(C(C(C(O1)CO[H])O[H])O[H])O[H])[OH]'])
mols=[]
drawSmile(smile,mols) 
***** - Open Babel Depiction O H H H H H O O O O O *****
In [23]:
res=docking(centr,prot,mols)
affin,rmsd,names=results(res,1)
/tmp/autodock_vina_yQ0vUk
--center_x 41.8146785714 --center_y 31.9285157807 --center_z 23.827236711 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 9 --energy_range 3
   0   C6H12O6    -5.3   0.000     UNL
   1   C6H12O6    -5.2   2.846     UNL
   2   C6H12O6    -5.1   2.230     UNL
   3   C6H12O6    -5.0   3.009     UNL
   4   C6H12O6    -5.0   3.251     UNL
   5   C6H12O6    -4.7   3.556     UNL
   6   C6H12O6    -4.6   3.580     UNL
   7   C6H12O6    -4.6   2.004     UNL
   8   C6H12O6    -4.5   3.877     UNL
In [24]:
tablichka(affin,rmsd,names)
AFFINITY
  C6H12O6
---------
     -5.3
     -5.2
     -5.1
     -5
     -5
     -4.7
     -4.6
     -4.6
     -4.5
===================================
RMSD
  C6H12O6
---------
    0
    2.846
    2.23
    3.009
    3.251
    3.556
    3.58
    2.004
    3.877

Первый вариант - лучший.

In [26]:
visualization(res,'r_oh%s.pdb')

Пристраиваем белок:

In [27]:
Image("oh.png")
Out[27]:

NH3+

In [28]:
smile=(['C1(C(C(C(C(O1)CO[H])O[H])O[H])O[H])[NH3+]'])
mols=[]
drawSmile(smile,mols) 
***** - Open Babel Depiction O H H H H H H H + N O O O O *****
In [29]:
res=docking(centr,prot,mols)
affin,rmsd,names=results(res,1)
/tmp/autodock_vina_352XsW
--center_x 41.8146785714 --center_y 31.9285157807 --center_z 23.827236711 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 9 --energy_range 3
   0  C6H14NO5    -5.3   0.000     UNL
   1  C6H14NO5    -5.1   3.050     UNL
   2  C6H14NO5    -5.1   2.152     UNL
   3  C6H14NO5    -5.0   3.306     UNL
   4  C6H14NO5    -4.9   2.665     UNL
   5  C6H14NO5    -4.6   3.530     UNL
   6  C6H14NO5    -4.6   3.935     UNL
   7  C6H14NO5    -4.6   3.572     UNL
   8  C6H14NO5    -4.5   2.111     UNL
In [30]:
tablichka(affin,rmsd,names)
AFFINITY
  C6H14NO5
----------
      -5.3
      -5.1
      -5.1
      -5
      -4.9
      -4.6
      -4.6
      -4.6
      -4.5
===================================
RMSD
  C6H14NO5
----------
     0
     3.05
     2.152
     3.306
     2.665
     3.53
     3.935
     3.572
     2.111
In [31]:
visualization(res,"r_nh3%s.pdb")
In [33]:
Image('nh3.png')
Out[33]:

H

In [34]:
smile=(['C1(C(C(C(C(O1)CO[H])O[H])O[H])O[H])[H]'])
mols=[]
drawSmile(smile,mols) 
***** - Open Babel Depiction O O O O O H H H H *****
In [35]:
res=docking(centr,prot,mols)
affin,rmsd,names=results(res,1)
/tmp/autodock_vina_sdJ2dg
--center_x 41.8146785714 --center_y 31.9285157807 --center_z 23.827236711 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 9 --energy_range 3
   0   C6H12O5    -5.0   0.000     UNL
   1   C6H12O5    -4.8   2.252     UNL
   2   C6H12O5    -4.8   3.420     UNL
   3   C6H12O5    -4.7   2.626     UNL
   4   C6H12O5    -4.3   2.417     UNL
   5   C6H12O5    -4.3   4.140     UNL
   6   C6H12O5    -4.2   3.051     UNL
   7   C6H12O5    -4.2   2.188     UNL
   8   C6H12O5    -4.2   3.104     UNL
In [36]:
tablichka(affin,rmsd,names)
AFFINITY
  C6H12O5
---------
     -5
     -4.8
     -4.8
     -4.7
     -4.3
     -4.3
     -4.2
     -4.2
     -4.2
===================================
RMSD
  C6H12O5
---------
    0
    2.252
    3.42
    2.626
    2.417
    4.14
    3.051
    2.188
    3.104
In [37]:
visualization(res,"r_h%s.pdb")
In [38]:
Image('h.png')
Out[38]:

Ph

In [39]:
smile=(['C1(C(C(C(C(O1)O[H])O[H])O[H])C2CCCCC2)O[H]'])
mols=[]
drawSmile(smile,mols) 
***** - Open Babel Depiction H H H H O O O O O *****
In [40]:
res=docking(centr,prot,mols)
affin,rmsd,names=results(res,1)
/tmp/autodock_vina_dGZiYN
--center_x 41.8146785714 --center_y 31.9285157807 --center_z 23.827236711 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 9 --energy_range 3
   0  C11H20O5    -5.7   0.000     UNL
   1  C11H20O5    -5.6   2.945     UNL
   2  C11H20O5    -5.4   3.253     UNL
   3  C11H20O5    -5.1   4.384     UNL
   4  C11H20O5    -5.0   3.633     UNL
   5  C11H20O5    -4.9   3.863     UNL
   6  C11H20O5    -4.7   3.640     UNL
   7  C11H20O5    -4.7   3.983     UNL
   8  C11H20O5    -4.7   5.115     UNL
In [41]:
tablichka(affin,rmsd,names)
AFFINITY
  C11H20O5
----------
      -5.7
      -5.6
      -5.4
      -5.1
      -5
      -4.9
      -4.7
      -4.7
      -4.7
===================================
RMSD
  C11H20O5
----------
     0
     2.945
     3.253
     4.384
     3.633
     3.863
     3.64
     3.983
     5.115
In [42]:
visualization(res,"r_ph%s.pdb")
In [43]:
Image("ph.png")
Out[43]:

COO-

In [44]:
smile=(['C1(C(C(C(C(O1)O[H])O[H])O[H])C(=O)O)O[H]'])
mols=[]
drawSmile(smile,mols) 
***** - Open Babel Depiction H H H H H O O O O O O O *****
In [45]:
res=docking(centr,prot,mols)
affin,rmsd,names=results(res,1)
/tmp/autodock_vina_yVHREA
--center_x 41.8146785714 --center_y 31.9285157807 --center_z 23.827236711 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 9 --energy_range 3
   0   C6H10O7    -5.6   0.000     UNL
   1   C6H10O7    -5.5   3.526     UNL
   2   C6H10O7    -5.5   3.341     UNL
   3   C6H10O7    -5.0   3.129     UNL
   4   C6H10O7    -4.9   2.649     UNL
   5   C6H10O7    -4.8   4.560     UNL
   6   C6H10O7    -4.7   3.762     UNL
   7   C6H10O7    -4.2  14.973     UNL
   8   C6H10O7    -4.1  14.818     UNL
In [46]:
tablichka(affin,rmsd,names)
AFFINITY
  C6H10O7
---------
     -5.6
     -5.5
     -5.5
     -5
     -4.9
     -4.8
     -4.7
     -4.2
     -4.1
===================================
RMSD
  C6H10O7
---------
    0
    3.526
    3.341
    3.129
    2.649
    4.56
    3.762
   14.973
   14.818
In [47]:
visualization(res,"r_coo%s.pdb")
In [48]:
Image("coo.png")
Out[48]:

Из статьи , например , можно было бы использовать autodock vina для докинга.

In [ ]: