Source code for treebuild.types

#! /usr/bin/env python
#
# Copyright (C) 2016 Jing Lu <ajingnk@gmail.com>
# License: Apache

# -*- coding: utf-8 -*-

# pylint: disable=too-few-public-methods
import math
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.AtomPairs import Pairs

from .model import SMILE_COLUMNNAME, POTENCY


[docs]class FingerPrintType: """ representing fingerprint types """ def __init__(self, name, fp_func, metadata): """ Initialize the fingerprint type :param name: name of fingerprint :param fp_func: the function to generate a fingerprint :param meta: string for fingerprint document :return: """ self.name = name self.fp_func = fp_func self.metadata = metadata
[docs] def to_dict(self): """ Show the information for this fingerprint :return: dictionary with basic info """ return {"name": self.name, "metadata": self.metadata}
[docs]class PropertyType: """ representing biological or chemical properties """ def __init__(self, name, metadata, transfunc=None, colname=None): """ :param name: a string name of property :param metadata: a string with property meaning :param transfunc: a function to generate the property :return: """ self.name = name self.metadata = metadata self.transfunc = transfunc # the signiture for transfunc is transfunc(a_value, a_mol_dict) self.colname = colname
[docs] def set_col_name(self, col_name): """ Set the property name from the input file :param col_name: original column name in the input file :return: """ self.colname = col_name
[docs] def gen_property(self, mol_dict = None): """ generate value for this property type :param prop_name: the name of the property :param mol_dict: other information about the molecule :return: a generated value for this property """ # if self.colname is None: # raise Exception("Please set the column name for this property") if self.colname in mol_dict: return mol_dict[self.colname] if self.name in mol_dict: return mol_dict[self.name] if not self.transfunc is None and not mol_dict is None: return self.transfunc(mol_dict) else: raise Exception("please provide the transformation function and molecule information for " + str(self))
[docs] def to_dict(self): """ Show the information :return: dictionary with basic info """ return {"name": self.name, "metadata": self.metadata}
def __str__(self): return self.name
# property functions def _lig_eff(mol_dict): smile = mol_dict[SMILE_COLUMNNAME] m = Chem.MolFromSmiles(smile) num_heavy = m.GetNumHeavyAtoms() if POTENCY in mol_dict: ic50 = mol_dict[POTENCY] return round(1.37 * (9 - math.log10(ic50)) / num_heavy, 5) elif "pIC50" in mol_dict: pic50 = mol_dict["pIC50"] return round(1.37 * pic50 / num_heavy, 5) else: raise Exception("Cannot calculate ligand efficiency, please change your input file column name to IC50 or pIC50.") def _slogp(mol_dict): smile = mol_dict[SMILE_COLUMNNAME] m = Chem.MolFromSmiles(smile) return round(Chem.rdMolDescriptors.CalcCrippenDescriptors(m)[0], 5) def _pic50(mol_dict): ic50 = mol_dict[POTENCY] return round(9 - math.log10(float(ic50)), 5) ecfp6 = FingerPrintType(name = "ECFP6", fp_func= lambda mol: AllChem.GetMorganFingerprint(mol, 3), metadata = "Extended Connectivity fingerprint, implemented in <a href=\"http://www.rdkit.org\">RDKit</a>. <br/>Parameters used: Radius = 3") atom_pair = FingerPrintType(name = "AtomPair", fp_func= Pairs.GetAtomPairFingerprint, metadata = "Atom Pairs as Molecular Features, describe in R.E. Carhart, D.H. Smith, R. Venkataraghavan; \"Atom Pairs as Molecular Features in Structure-Activity Studies: Definition and Applications\" JCICS 25, 64-73 (1985).implemented in <a href=\"http://www.rdkit.org\">RDKit</a>. <br/>") lig_eff = PropertyType(name = "Lig_Eff", metadata = "Ligand efficiency. The value is calculated by the function 1.37 * pIC50 / a_heavy", transfunc = _lig_eff) slogp = PropertyType(name = "SLogP", metadata = "SLogP, the coefficients are a measure of the difference in solubility of the compound in water and octanol. describe in S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) R.E. Carhart, D.H. Smith, R. Venkataraghavan; \"Atom Pairs as Molecular Features in Structure-Activity Studies:", transfunc = _slogp) ic50 = PropertyType(name = "IC50", colname = "IC50", metadata = "The half maximal inhibitory concentration (IC50) is a measure of the effectiveness of a substance in inhibiting a specific biological or biochemical function.") pic50 = PropertyType(name = "pIC50", metadata = "This number assumes IC50 in nM unit, so it is calculated by 9 - log(IC50). Please change your data or the code to make it appropriate.", transfunc = _pic50) bindingdb = {"name": "BindingDB", "link": "https://www.bindingdb.org/bind/chemsearch/marvin/MolStructure.jsp?monomerid="} chebi = {"name": "CHEBI", "link": "https://www.ebi.ac.uk/chebi/searchId.do?chebiId="} pubchem = {"name": "PubChem", "link": "https://pubchem.ncbi.nlm.nih.gov/compound/"} DEFAULT_FINGERPRINT_TYPES = [ecfp6, atom_pair] DEFAULT_ACTIVITY_TYPES = [ic50, pic50] DEFAULT_PROPERTY_TYPES = [lig_eff, slogp] DEFAULT_EXTERNAL = [bindingdb, pubchem]