In this task, we'll perform docking of ~~ligands of 1lmp~~ some apparently random ligands (namely, hydroxybenzene, 1,4-Dihydroxybenzene and 3,5-Dihydroxybiphenyl), to the previously generated protein structure of P00705.fasta.

from __future__ import division

import numpy as np
import copy

# Отображение структур
import IPython.display
import ipywidgets
from IPython.display import display,display_svg,SVG,Image

# Open Drug Discovery Toolkit
import oddt
import oddt.docking
import oddt.interactions

# Органика
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole

import pmx # Модуль для манипулирования pdb

pdb=pmx.Model('../Task6/ligand_enabled.B99990001.pdb')

for r in pdb.residues[-15:]:
    print r #посмотрим остатки

<Molecule: id = 136 name = THR chain_id =    natoms = 7>
<Molecule: id = 137 name = ASP chain_id =    natoms = 8>
<Molecule: id = 138 name = VAL chain_id =    natoms = 7>
<Molecule: id = 139 name = SER chain_id =    natoms = 6>
<Molecule: id = 140 name = LYS chain_id =    natoms = 9>
<Molecule: id = 141 name = TRP chain_id =    natoms = 14>
<Molecule: id = 142 name = ILE chain_id =    natoms = 8>
<Molecule: id = 143 name = ARG chain_id =    natoms = 11>
<Molecule: id = 144 name = GLY chain_id =    natoms = 4>
<Molecule: id = 145 name = CYS chain_id =    natoms = 6>
<Molecule: id = 146 name = ARG chain_id =    natoms = 11>
<Molecule: id = 147 name = LEU chain_id =    natoms = 9>
<Molecule: id = 148 name = NAG chain_id =    natoms = 14>
<Molecule: id = 149 name = NAG chain_id =    natoms = 14>
<Molecule: id = 150 name = NDG chain_id =    natoms = 15>

# there is a Model.fetch_residues() function, but it is woefully
# undocumented, and using magic numbers to split the protein is
# distasteful, so there.

protein_atoms = filter(lambda a: a.molecule.is_protein_residue(), pdb.atoms)
ligand_atoms = filter(lambda a: not a.molecule.is_protein_residue(), pdb.atoms)

newpdb = pmx.Model(atoms = protein_atoms)
lig = pmx.Model(atoms = ligand_atoms)
print(len(newpdb.residues), len(lig.residues))

(147, 3)

Geometric center of ligand:

lig_center = sum((np.array(a.x) for a in lig.atoms))/len(lig.atoms)
lig_center

array([ 47.91416279,  40.5275814 ,  26.00883721])

newpdb.writePDB(fname="ligandless.pdb")

prot = oddt.toolkit.readfile('pdb','ligandless.pdb').next()

prot.OBMol.AddPolarHydrogens()
prot.OBMol.AutomaticPartialCharge()

# The flying duck?
# print('is it the first mol in 1lmp is protein?',prot.protein,':) and MW of this mol is:', prot.molwt)
print('MW of the mol: %f' % prot.molwt)

MW of the mol: 16354.571400

We want to take NAG and replace the first C (that is, CH3) in its' CC(=O)N- group with various other groups, such as:

OH
NH3+
H
Ph/C6H5
COO-

(One possible) Smiles of NAG is CC(=O)NC1C(O)C(O)C(OC1O)CO, from which we construct smiles of alternate ligands:

smiles_common = "C(=O)NC1C(O)C(O)C(OC1O)CO"
smiles_variant = ["O", "[N+]", "", "c1ccccc1", "[O-]"]
smiles = [s+smiles_common for s in smiles_variant]
#smiles = ['OC(=O)NC1C(O)C(O)C(OC1O)CO',
#          '[N+]C(=O)NC1C(O)C(O)C(OC1O)CO',
#          'C(=O)NC1C(O)C(O)C(OC1O)CO',
#          'c1ccccc1C(=O)NC1C(O)C(O)C(OC1O)CO',
#          '[O-]C(=O)C(=O)NC1C(O)C(O)C(OC1O)CO']
mols= []
images =[]

for s in smiles:
    m = oddt.toolkit.readstring('smi', s)
    if not m.OBMol.Has3D(): 
        m.make3D(forcefield='mmff94', steps=150)
        m.removeh()
        m.OBMol.AddPolarHydrogens()

    mols.append(m)
    ###with print m.OBMol.Has3D() was found that:
    ### deep copy needed to keep 3D , write svg make mols flat
    images.append((SVG(copy.deepcopy(m).write('svg'))))

display_svg(*images)

#create docking object
dock_obj= oddt.docking.AutodockVina.autodock_vina(
    protein=prot,size=(20,20,20),center=lig_center,
    executable='/usr/bin/vina',autocleanup=True, num_modes=9)

print dock_obj.tmp_dir
print " ".join(dock_obj.params) # Опишите выдачу

/tmp/autodock_vina_pmj45Q
--center_x 47.9141627907 --center_y 40.5275813953 --center_z 26.0088372093 --size_x 20 --size_y 20 --size_z 20 --cpu 1 --exhaustiveness 8 --num_modes 9 --energy_range 3

In which autodock_vina parameters are described

center_x,y,z and size_x,y,z describe dimensions and position of the docking box — fragment of space the ligand can be moved over in the process of docking;
cpu quite probably limits cpu usage to one core;
exhaustiveness parameter selects a point on the [computation time — probability of finding global minimum] tradeoff curve;
num_modes determines the maximum number of conformations generated;
energy_range defines maximum energy difference between best and worst displayed conformation, effectively filtering conformations with energy above a threshold.

res = dock_obj.dock(mols,prot)

res_sorted = sorted(res, key=lambda x: x.data['vina_affinity'], reverse=True)

#table = []
formulas = set(r.formula for r in res)

#for f in formulas:
#    table.append(next(r for r in res_sorted if r.formula == f))
#table.sort(key=lambda x: x.data['vina_affinity'], reverse=True)
print("%4s%10s%9s%8s%8s") % ("#", "formula", "vina_aff", "rmsd", "name")
for i,r in enumerate(res_sorted):
    hbs = oddt.interactions.hbonds(prot,r)
    stack= oddt.interactions.pi_stacking(prot,r)
    phob = oddt.interactions.hydrophobic_contacts(prot,r)
    print "%4d%10s%9s%8s%8s" \
    %(i,r.formula, r.data['vina_affinity'],
      r.data['vina_rmsd_ub'], r.residues[0].name,
     )

   #   formula vina_aff    rmsd    name
   0 C13H17NO6     -6.5   0.000     UNL
   1 C13H17NO6     -6.2   7.111     UNL
   2 C13H17NO6     -6.2   6.580     UNL
   3 C13H17NO6     -6.2   4.782     UNL
   4 C13H17NO6     -6.1   7.723     UNL
   5 C13H17NO6     -6.1   3.119     UNL
   6 C13H17NO6     -6.0   7.443     UNL
   7 C13H17NO6     -5.8   6.040     UNL
   8 C13H17NO6     -5.8   5.384     UNL
   9  C7H13NO7     -5.7   0.000     UNL
  10 C7H14N2O6     -5.5   0.000     UNL
  11  C7H13NO7     -5.5   0.000     UNL
  12  C7H13NO7     -5.5   3.080     UNL
  13  C7H13NO7     -5.4   5.885     UNL
  14  C7H13NO7     -5.4   3.461     UNL
  15  C7H13NO7     -5.4   5.678     UNL
  16 C7H14N2O6     -5.4   5.312     UNL
  17 C7H14N2O6     -5.4   3.221     UNL
  18 C7H14N2O6     -5.4   7.127     UNL
  19  C7H13NO7     -5.4   5.550     UNL
  20  C7H13NO6     -5.3   0.000     UNL
  21  C7H13NO6     -5.3   4.403     UNL
  22  C7H13NO7     -5.2   6.745     UNL
  23  C7H13NO7     -5.2   4.848     UNL
  24  C7H13NO7     -5.2   3.900     UNL
  25  C7H13NO7     -5.1   5.333     UNL
  26  C7H13NO7     -5.1   5.925     UNL
  27 C7H14N2O6     -5.1   4.764     UNL
  28 C7H14N2O6     -5.0   3.029     UNL
  29 C7H14N2O6     -5.0   5.029     UNL
  30  C7H13NO6     -5.0   3.390     UNL
  31  C7H13NO6     -5.0   4.935     UNL
  32  C7H13NO7     -4.9   6.023     UNL
  33  C7H13NO7     -4.9   8.437     UNL
  34  C7H13NO7     -4.9   3.059     UNL
  35 C7H14N2O6     -4.9   7.898     UNL
  36  C7H13NO7     -4.9   3.646     UNL
  37  C7H13NO7     -4.9   5.494     UNL
  38 C7H14N2O6     -4.8   4.921     UNL
  39  C7H13NO6     -4.8   5.192     UNL
  40  C7H13NO6     -4.8   3.970     UNL
  41  C7H13NO6     -4.8   1.929     UNL
  42  C7H13NO7     -4.7   2.678     UNL
  43  C7H13NO6     -4.6   4.301     UNL
  44  C7H13NO6     -4.5   6.504     UNL

for i,r in enumerate(res_sorted):
    hbs = oddt.interactions.hbonds(prot,r)
    stack= oddt.interactions.pi_stacking(prot,r)
    phob = oddt.interactions.hydrophobic_contacts(prot,r)
    #print(hbs, stack, phob)

for i,r in enumerate(table):
    r.write(filename='r%s.pdb' % i, format='pdb', overwrite=True)

Let's look at the best available ligand.

import nglview
import ipywidgets
from IPython.display import Image

view1 = nglview.show_structure_file("ligandless.pdb")
view1.add_structure(nglview.FileStructure('r0.pdb'))
#view1.clear_representations()

#view1[0].add_representation(repr_type="")
view1[1].add_representation(repr_type="licorice")
view1[1].center_view()
view1

view1.download_image("religanded.png")
Image("religanded.png")