Source code for molearn.scoring.ramachandran_score
import numpy as np
from copy import deepcopy
from multiprocessing import get_context
from scipy.spatial.distance import cdist
from iotbx.data_manager import DataManager
from mmtbx.validation.ramalyze import ramalyze
from scitbx.array_family import flex
from ..utils import random_string
import os
[docs]
class Ramachandran_Score:
'''
This class contains methods that use iotbx/mmtbx to calulate the quality of phi and psi values in a protein.
'''
def __init__(self, mol, threshold=1e-3):
'''
:param biobox.Molecule mol: One example frame to gain access to the topology. Mol will also be used to save a temporary pdb file that will be reloaded to create the initial iotbx Model.
:param float threshold: (default: 1e-3) Threshold used to determine similarity between biobox.molecule coordinates and iotbx model coordinates. Determine that iotbx model was created successfully.
'''
tmp_file = f'rama_tmp{random_string()}.pdb'
mol.write_pdb(tmp_file, split_struc=False)
filename = tmp_file
self.mol = mol
self.dm = DataManager(datatypes=['model'])
self.dm.process_model_file(filename)
self.model = self.dm.get_model(filename)
self.score = ramalyze(self.model.get_hierarchy()) # get score to see if this works
self.shape = self.model.get_sites_cart().as_numpy_array().shape
# tests
x = self.mol.coordinates[0]
m = self.model.get_sites_cart().as_numpy_array()
assert m.shape == x.shape
self.idxs = np.where(cdist(m, x)<threshold)[1]
assert self.idxs.shape[0] == m.shape[0]
assert not np.any(((m-x[self.idxs])>threshold))
os.remove(tmp_file)
[docs]
def get_score(self, coords, as_ratio=False):
'''
Given coords (corresponding to self.mol) will calculate Ramachandran scores using cctbux ramalyze module
Returns the counts of number of torsion angles that fall within favored, allowed, and outlier regions and finally the total number of torsion angles analysed.
:param numpy.ndarray coords: shape (N, 3)
:returns: (favored, allowed, outliers, total)
:rtype: tuple of ints
'''
assert coords.shape == self.shape
self.model.set_sites_cart(flex.vec3_double(coords[self.idxs].astype(np.double)))
self.score = ramalyze(self.model.get_hierarchy())
nf = self.score.n_favored
na = self.score.n_allowed
no = self.score.n_outliers
nt = self.score.n_total
if as_ratio:
return nf/nt, na/nt, no/nt
else:
return nf, na, no, nt
def set_global_score(score, kwargs):
'''
make score a global variable
This is used when initializing a multiprocessing process
'''
global worker_ramachandran_score
worker_ramachandran_score = score(**kwargs) # mol = mol, data_dir=data_dir, **kwargs)
def process_ramachandran(coords, kwargs):
'''
ramachandran worker
Worker function for multiprocessing class
'''
return worker_ramachandran_score.get_score(coords, **kwargs)
[docs]
class Parallel_Ramachandran_Score:
'''
A multiprocessing class to get Ramachandran scores.
A typical use case would looke like::
score_class = Parallel_Ramachandran_Score(mol, **kwargs)
results = []
for frame in coordinates_array:
results.append(score_class.get_score(frame))
# Ramachandran scores will be calculated asynchronously in background
...
# to retrieve the results
results = np.array([r.get() for r in results])
favored = results[:,0]
allowed = results[:,1]
outliers = results[:,2]
total = results[:,3]
'''
def __init__(self, mol, processes=-1):
'''
:param biobox.Molecule mol: biobox melucel containing one example fram of the protein to be analysed. This will be passed to Ramachandran_Score instances in each thread.
:param int processes: (default: -1) Number of processes argument to pass to multiprocessing.pool. This controls the number of therads created.
'''
# set a number of processes as user desires, capped on number of CPUs
if processes > 0:
processes = min(processes, os.cpu_count())
else:
processes = os.cpu_count()
self.mol = deepcopy(mol)
score = Ramachandran_Score
ctx = get_context('spawn')
self.pool = ctx.Pool(processes=processes, initializer=set_global_score,
initargs=(score, dict(mol=mol)))
self.process_function = process_ramachandran
def __reduce__(self):
return (self.__class__, (self.mol,))
[docs]
def get_score(self, coords, **kwargs):
'''
:param coords: # shape (N, 3) numpy array
'''
# is copy necessary?
return self.pool.apply_async(self.process_function, (coords.copy(), kwargs))
def _close(self):
if self.pool:
self.pool.close()
self.pool.join()
self.pool = None