Structural bionformatics. Homework №7

In [1]:
import numpy as np
import pandas as pd
import copy

# Отображение структур
import IPython.display
import ipywidgets
from IPython.display import display,display_svg,SVG,Image

# Open Drug Discovery Toolkit
import oddt
import oddt.docking
import oddt.interactions

# Органика
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole

import pmx # Модуль для манипулирования pdb 

1. Подготовка белка

In [2]:
pdb=pmx.Model('ligand.B99990001.pdb')
for r in pdb.residues[135:]:
    print r
<Molecule: id = 136 name = NAG chain_id =    natoms = 14>
<Molecule: id = 137 name = NAG chain_id =    natoms = 14>
<Molecule: id = 138 name = NDG chain_id =    natoms = 15>
In [3]:
# создание объектов белок и лиганда
newpdb = pdb.copy()
for r in newpdb.residues[-3:]:
    newpdb.remove_residue(r)
lig = pdb.copy()
del lig.residues[:-3]
In [4]:
#будем считать за центр среднее по всем координатам
coordinates = []
for a in lig.atoms:
    coordinates.append(a.x)
center = np.mean(coordinates,axis=0)
print(center)
[ 41.651375    35.46804815  19.97598796]
In [6]:
newpdb.writePDB('new_prot.pdb')

2. Подготовка белка для докинга

In [7]:
prot = oddt.toolkit.readfile('pdb','new_prot.pdb').next()
prot.OBMol.AddPolarHydrogens()
prot.OBMol.AutomaticPartialCharge()
print 'is it the first mol in 1lmp is protein?',prot.protein,':) and MW of this mol is:', prot.molwt 
is it the first mol in 1lmp is protein? False :) and MW of this mol is: 14947.60714

3. Лиганды для докинга
NAG содержит в себе СH3C(=O)NH группу.
Заменяем метильный (CH3) радикал этой группы на :
1) OH -> O
2) NH3+ -> [N+]
3) H ->
4) Ph -> C1=CC=[C]C=C1
5) COO- -> [-O]C(=O)
SMILES для NAG 'CC(=O)NC1C(C(C(OC1O)CO)O)O': http://www.ebi.ac.uk/pdbe-srv/pdbechem/chemicalCompound/show/NAG
Значит, с точки зрения записи меняться будет самый левый углерод.

In [8]:
#http://www.ebi.ac.uk/pdbe-srv/pdbechem/chemicalCompound/show/NAG
smilesNAG = 'CC(=O)NC1C(C(C(OC1O)CO)O)O'
smiles = ['OC(=O)NC1C(C(C(OC1O)CO)O)O',               #OH
          '[NH3+]C(=O)NC1C(C(C(OC1O)CO)O)O',          #NH3+
          'C(=O)NC1C(C(C(OC1O)CO)O)O',                #H
          'C1=CC=[C]C=C1C(=O)NC1C(C(C(OC1O)CO)O)O',   #PH
          '[O-]C(=O)C(=O)NC1C(C(C(OC1O)CO)O)O']       #COO  
mols = []
images = []

for s in smiles:
    m = oddt.toolkit.readstring('smi', s)
    if not m.OBMol.Has3D(): 
        m.make3D(forcefield='mmff94', steps=150)
        m.removeh()
        m.OBMol.AddPolarHydrogens()

    mols.append(m)
    images.append((SVG(copy.deepcopy(m).write('svg'))))

display_svg(*images)
***** - Open Babel Depiction O H H H H H H O O O O O N O *****
***** - Open Babel Depiction H H H H H H H H O O O N O O N O *****
***** - Open Babel Depiction O H H H H H O O O O N O *****
***** - Open Babel Depiction H H H H H O O O O O N O *****
***** - Open Babel Depiction H H H H H O O O O O O N O HO *****

4. Докинг

In [9]:
xx = center[0]
yy = center[1]
zz = center[2]
In [10]:
dock_obj= oddt.docking.AutodockVina.autodock_vina(
    protein=prot,size=(20,20,20),center=[xx,yy,zz],
    executable='/usr/bin/vina',autocleanup=True, num_modes=9)
print dock_obj.tmp_dir
/tmp/autodock_vina_dFtnBu
In [11]:
print('Center coordinates')
print " ".join(dock_obj.params[0:6])
print ("№ of points")
print " ".join(dock_obj.params[6:12])
print ("Number Of CPU Cores")
print " ".join(dock_obj.params[12:14])
print("Exhaustiveness parameter")
print " ".join(dock_obj.params[14:16])
print("Number of conformations generated by Autodock Vina")
print " ".join(dock_obj.params[16:18])
print("Energy range cutoff for Autodock Vina")
print " ".join(dock_obj.params[18:20])
Center coordinates
--center_x 41.651375 --center_y 35.4680481481 --center_z 19.975987963
№ of points
--size_x 20 --size_y 20 --size_z 20
Number Of CPU Cores
--cpu 1
Exhaustiveness parameter
--exhaustiveness 8
Number of conformations generated by Autodock Vina
--num_modes 9
Energy range cutoff for Autodock Vina
--energy_range 3
In [13]:
res = dock_obj.dock(mols,prot)

5. Результаты докинга

In [32]:
formula = []
affinity = []
rsmd = []
name = []
for i,r in enumerate(res):
    formula.append(r.formula)
    affinity.append(r.data['vina_affinity'])
    rsmd.append(r.data['vina_rmsd_ub'])
    name.append(r.residues[0].name)
In [46]:
radical = ['OH']*9+['NH3+']*9+['H']*9+['Ph']*9+['COO']*9
#data = np.array()
table = pd.DataFrame([radical,formula,affinity,rsmd,name])
table = table.transpose()
table = table.rename(index=str, columns={0: 'Radical', 1: 'Formula',2:'Affinity',3:'RSMD',4:'Name'})
table.sort_values(by='Affinity')
Out[46]:
Radical Formula Affinity RSMD Name
8 OH C7H13NO7 -6.2 2.888 UNL
26 H C7H13NO6 -6.3 3.419 UNL
25 H C7H13NO6 -6.3 1.644 UNL
6 OH C7H13NO7 -6.3 5.169 UNL
7 OH C7H13NO7 -6.3 5.321 UNL
4 OH C7H13NO7 -6.4 4.618 UNL
5 OH C7H13NO7 -6.4 4.723 UNL
24 H C7H13NO6 -6.4 3.271 UNL
22 H C7H13NO6 -6.5 5.120 UNL
3 OH C7H13NO7 -6.5 6.455 UNL
23 H C7H13NO6 -6.5 3.317 UNL
21 H C7H13NO6 -6.5 4.209 UNL
17 NH3+ C7H15N2O6 -6.5 2.779 UNL
16 NH3+ C7H15N2O6 -6.5 4.538 UNL
43 COO C8H13NO8 -6.6 2.224 UNL
20 H C7H13NO6 -6.6 6.050 UNL
44 COO C8H13NO8 -6.6 5.983 UNL
14 NH3+ C7H15N2O6 -6.6 5.220 UNL
15 NH3+ C7H15N2O6 -6.6 5.080 UNL
13 NH3+ C7H15N2O6 -6.7 5.000 UNL
12 NH3+ C7H15N2O6 -6.7 4.503 UNL
2 OH C7H13NO7 -6.7 5.812 UNL
18 H C7H13NO6 -6.8 0.000 UNL
19 H C7H13NO6 -6.8 4.198 UNL
1 OH C7H13NO7 -6.8 4.451 UNL
11 NH3+ C7H15N2O6 -6.9 1.497 UNL
10 NH3+ C7H15N2O6 -7.0 2.854 UNL
42 COO C8H13NO8 -7.0 5.551 UNL
41 COO C8H13NO8 -7.0 3.292 UNL
40 COO C8H13NO8 -7.1 5.455 UNL
39 COO C8H13NO8 -7.2 5.721 UNL
0 OH C7H13NO7 -7.2 0.000 UNL
9 NH3+ C7H15N2O6 -7.3 0.000 UNL
35 Ph C13H17NO6 -7.4 7.531 UNL
38 COO C8H13NO8 -7.4 2.924 UNL
34 Ph C13H17NO6 -7.5 6.401 UNL
37 COO C8H13NO8 -7.5 3.078 UNL
32 Ph C13H17NO6 -7.6 4.294 UNL
33 Ph C13H17NO6 -7.6 6.868 UNL
31 Ph C13H17NO6 -7.6 6.748 UNL
30 Ph C13H17NO6 -7.6 5.032 UNL
29 Ph C13H17NO6 -7.7 8.996 UNL
36 COO C8H13NO8 -7.9 0.000 UNL
28 Ph C13H17NO6 -7.9 6.266 UNL
27 Ph C13H17NO6 -8.3 0.000 UNL

6. Анализ докинга и сохранение результатов

In [47]:
for i,r in enumerate(res):
    hbs = oddt.interactions.hbonds(prot,r)
    stack= oddt.interactions.pi_stacking(prot,r)
    phob = oddt.interactions.hydrophobic_contacts(prot,r)
In [48]:
for i,r in enumerate(res):
    r.write(filename='r%s.pdb' % i, format='pdb')

7. Визуализация
Критерий выбора для визуализации - наименьшее значение афинности по каждому радикалу.

In [71]:
import __main__
__main__.pymol_argv = ['pymol','-qc'] # Pymol: quiet and no GUI
from time import sleep
import time 
import pymol
pymol.finish_launching()

defaultImage = '/tmp/pymolimg.png'
def prepareImage(width=600, height=600, sleep=2, filename=defaultImage):
    ## To save the rendered image
    pymol.cmd.ray(width, height)
    pymol.cmd.png('/tmp/pymolimg.png')
    time.sleep(sleep)
In [75]:
#OH
pymol.cmd.reinitialize()
pymol.cmd.load('new_prot.pdb')
pymol.cmd.load('r8.pdb')
pymol.cmd.show('sticks','r8')
pymol.cmd.show('cartoon','new_prot')
pymol.cmd.hide('lines','new_prot')
pymol.cmd.orient()
prepareImage()
Image(defaultImage)
Out[75]:
In [78]:
#H
pymol.cmd.reinitialize()
pymol.cmd.load('new_prot.pdb')
pymol.cmd.load('r26.pdb')
pymol.cmd.show('sticks','r26')
pymol.cmd.show('cartoon','new_prot')
pymol.cmd.hide('lines','new_prot')
pymol.cmd.orient()
prepareImage()
Image(defaultImage)
Out[78]:
In [79]:
#NH3
pymol.cmd.reinitialize()
pymol.cmd.load('new_prot.pdb')
pymol.cmd.load('r17.pdb')
pymol.cmd.show('sticks','r17')
pymol.cmd.show('cartoon','new_prot')
pymol.cmd.hide('lines','new_prot')
pymol.cmd.orient()
prepareImage()
Image(defaultImage)
Out[79]:
In [80]:
#COO
pymol.cmd.reinitialize()
pymol.cmd.load('new_prot.pdb')
pymol.cmd.load('r43.pdb')
pymol.cmd.show('sticks','r43')
pymol.cmd.show('cartoon','new_prot')
pymol.cmd.hide('lines','new_prot')
pymol.cmd.orient()
prepareImage()
Image(defaultImage)
Out[80]:
In [70]:
#Ph
pymol.cmd.reinitialize()
pymol.cmd.load('new_prot.pdb')
pymol.cmd.load('r35.pdb')
pymol.cmd.show('sticks','r35')
pymol.cmd.show('cartoon','new_prot')
pymol.cmd.hide('lines','new_prot')
pymol.cmd.orient()
prepareImage()
Image(defaultImage)
Out[70]:

ODDT
Судя по всему, мы могли бы проделать весь Example 1: filtering, docking and re-scoring workflow, чтобы достичь нашей цели
1) score и rescore докинга с помощью RF-Score and NNScore 2) предсказывать афинность разными методами: случайным лесом, множественной линейной регрессией, PLS регрессией и нейронной сетью