|
|
from Bio.PDB import PDBParser, Superimposer, is_aa, Select, NeighborSearch |
|
|
import tmtools |
|
|
import os |
|
|
import numpy as np |
|
|
import mdtraj as md |
|
|
from Bio.SeqUtils import seq1 |
|
|
|
|
|
import warnings |
|
|
from Bio import BiopythonWarning, SeqIO |
|
|
|
|
|
import difflib |
|
|
import torch |
|
|
|
|
|
|
|
|
warnings.filterwarnings('ignore', category=BiopythonWarning) |
|
|
|
|
|
def get_chain_from_pdb(pdb_path, chain_id='A'): |
|
|
parser = PDBParser() |
|
|
structure = parser.get_structure('X', pdb_path)[0] |
|
|
for chain in structure: |
|
|
if chain.id == chain_id: |
|
|
|
|
|
return chain |
|
|
return None |
|
|
|
|
|
def diff_ratio(str1, str2): |
|
|
|
|
|
seq_matcher = difflib.SequenceMatcher(None, str1, str2) |
|
|
|
|
|
|
|
|
return seq_matcher.ratio() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def align_chains(chain1, chain2): |
|
|
reslist1 = [] |
|
|
reslist2 = [] |
|
|
for residue1,residue2 in zip(chain1.get_residues(),chain2.get_residues()): |
|
|
if is_aa(residue1) and residue1.has_id('CA'): |
|
|
reslist1.append(residue1) |
|
|
reslist2.append(residue2) |
|
|
return reslist1,reslist2 |
|
|
|
|
|
def get_rmsd(chain1, chain2): |
|
|
|
|
|
|
|
|
if chain1 is None or chain2 is None: |
|
|
return None |
|
|
super_imposer = Superimposer() |
|
|
pos1 = np.array([atom.get_coord() for atom in chain1.get_atoms() if atom.name == 'CA']) |
|
|
pos2 = np.array([atom.get_coord() for atom in chain2.get_atoms() if atom.name == 'CA']) |
|
|
rmsd1 = np.sqrt(np.sum((pos1 - pos2)**2) / len(pos1)) |
|
|
super_imposer.set_atoms([atom for atom in chain1.get_atoms() if atom.name == 'CA'], |
|
|
[atom for atom in chain2.get_atoms() if atom.name == 'CA']) |
|
|
rmsd2 = super_imposer.rms |
|
|
return rmsd1,rmsd2 |
|
|
|
|
|
def get_tm(chain1,chain2): |
|
|
|
|
|
|
|
|
pos1 = np.array([atom.get_coord() for atom in chain1.get_atoms() if atom.name == 'CA']) |
|
|
pos2 = np.array([atom.get_coord() for atom in chain2.get_atoms() if atom.name == 'CA']) |
|
|
tm_results = tmtools.tm_align(pos1, pos2, 'A'*len(pos1), 'A'*len(pos2)) |
|
|
|
|
|
return tm_results.tm_norm_chain2 |
|
|
|
|
|
def get_traj_chain(pdb, chain): |
|
|
parser = PDBParser() |
|
|
structure = parser.get_structure('X', pdb)[0] |
|
|
chain2id = {chain.id:i for i,chain in enumerate(structure)} |
|
|
traj = md.load(pdb) |
|
|
chain_indices = traj.topology.select(f"chainid {chain2id[chain]}") |
|
|
traj = traj.atom_slice(chain_indices) |
|
|
return traj |
|
|
|
|
|
def get_second_stru(pdb,chain): |
|
|
parser = PDBParser() |
|
|
structure = parser.get_structure('X', pdb)[0] |
|
|
chain2id = {chain.id:i for i,chain in enumerate(structure)} |
|
|
traj = md.load(pdb) |
|
|
chain_indices = traj.topology.select(f"chainid {chain2id[chain]}") |
|
|
traj = traj.atom_slice(chain_indices) |
|
|
return md.compute_dssp(traj,simplified=True) |
|
|
|
|
|
def get_ss(traj1,traj2): |
|
|
|
|
|
ss1,ss2 = md.compute_dssp(traj1,simplified=True),md.compute_dssp(traj2,simplified=True) |
|
|
return (ss1==ss2).mean() |
|
|
|
|
|
def get_bind_site(pdb,chain_id): |
|
|
parser = PDBParser() |
|
|
structure = parser.get_structure('X', pdb)[0] |
|
|
peps = [atom for res in structure[chain_id] for atom in res if atom.get_name() == 'CA'] |
|
|
recs = [atom for chain in structure if chain.get_id()!=chain_id for res in chain for atom in res if atom.get_name() == 'CA'] |
|
|
|
|
|
search = NeighborSearch(recs) |
|
|
near_res = [] |
|
|
for atom in peps: |
|
|
near_res += search.search(atom.get_coord(), 10.0, level='R') |
|
|
near_res = set([res.get_id()[1] for res in near_res]) |
|
|
return near_res |
|
|
|
|
|
def get_bind_ratio(pdb1, pdb2, chain_id1, chain_id2): |
|
|
near_res1,near_res2 = get_bind_site(pdb1,chain_id1),get_bind_site(pdb2,chain_id2) |
|
|
|
|
|
|
|
|
return len(near_res1.intersection(near_res2))/(len(near_res2)+1e-10) |
|
|
|
|
|
def get_dihedral(pdb,chain): |
|
|
traj = get_traj_chain(pdb,chain) |
|
|
|
|
|
|
|
|
def get_seq(pdb,chain_id): |
|
|
parser = PDBParser() |
|
|
chain = parser.get_structure('X', pdb)[0][chain_id] |
|
|
return seq1("".join([residue.get_resname() for residue in chain])) |
|
|
|
|
|
def get_mpnn_seqs(path): |
|
|
fastas = [] |
|
|
for record in SeqIO.parse(path, "fasta"): |
|
|
tmp = [c for c in str(record.seq)] |
|
|
fastas.append(tmp) |
|
|
return fastas |
|
|
|
|
|
|