[英]Extracting only the chains that we need from a PDB file

I need to extract specific chains from PDB files( Sometiems more than one chain). 我需要从PDB文件中提取特定的链(不止一个链)。 How to extract chains from a PDB file? 如何从PDB文件中提取链? . It's the same question and "marked" answer, answers my problem. 这是相同的问题,并带有“已标记”答案,回答了我的问题。 But it does not work in python 3. It gives errors one after the other. 但是它在python 3中不起作用。它一个接一个地给出错误。 Does anybody knows how can i work this in python 3? 有人知道我该如何在python 3中工作吗?

Or any other code for the same kind of problem 或任何其他类似问题的代码

Thank you in advance. 先感谢您。

import os
from Bio import PDB

class ChainSplitter:
    def __init__(self, out_dir=None):
        """ Create parsing and writing objects, specify output directory. """
        self.parser = PDB.PDBParser()
        self.writer = PDB.PDBIO()
        if out_dir is None:
            out_dir = os.path.join(os.getcwd(), "chain_PDBs")
        self.out_dir = out_dir

    def make_pdb(self, pdb_path, chain_letters, overwrite=False, struct=None):
        """ Create a new PDB file containing only the specified chains.

        Returns the path to the created file.

        :param pdb_path: full path to the crystal structure
        :param chain_letters: iterable of chain characters (case insensitive)
        :param overwrite: write over the output file if it exists
        chain_letters = [chain.upper() for chain in chain_letters]

        # Input/output files
        (pdb_dir, pdb_fn) = os.path.split(pdb_path)
        pdb_id = pdb_fn[3:7]
        out_name = "pdb%s_%s.ent" % (pdb_id, "".join(chain_letters))
        out_path = os.path.join(self.out_dir, out_name)
        print ("OUT PATH:",out_path)
        plural = "s" if (len(chain_letters) > 1) else ""  # for printing

        # Skip PDB generation if the file already exists
        if (not overwrite) and (os.path.isfile(out_path)):
            print("Chain%s %s of '%s' already extracted to '%s'." %
                    (plural, ", ".join(chain_letters), pdb_id, out_name))
            return out_path

        print("Extracting chain%s %s from %s..." % (plural,
                ", ".join(chain_letters),  pdb_fn))

        # Get structure, write new file with only given chains
        if struct is None:
            struct = self.parser.get_structure(pdb_id, pdb_path)
        self.writer.save(out_path, select=SelectChains(chain_letters))

        return out_path

class SelectChains(PDB.Select):
    """ Only accept the specified chains when saving. """
    def __init__(self, chain_letters):
        self.chain_letters = chain_letters

    def accept_chain(self, chain):
        return (chain.get_id() in self.chain_letters)

if __name__ == "__main__":
    """ Parses PDB id's desired chains, and creates new PDB structures. """
    import sys
    if not len(sys.argv) == 2:
        print ("Usage: $ python %s 'pdb.txt'" % __file__)

    pdb_textfn = sys.argv[1]

    pdbList = PDB.PDBList()
    splitter = ChainSplitter("/home/patrick/Desktop/chain_splitting")

    with open(pdb_textfn) as pdb_textfile:
        for line in pdb_textfile:
            pdb_id = line[:4].lower()
            chain = line[4]
            pdb_fn = pdbList.retrieve_pdb_file(pdb_id)
            splitter.make_pdb(pdb_fn, chain)

retrieve_pdb_file has the optional parameter file_format . retrieve_pdb_file具有可选参数file_format When no information is provided, the PDB server returns cif files. 如果未提供任何信息,则PDB服务器将返回cif文件。 Biopython's parser expects a PDB file. Biopython的解析器需要一个PDB文件。

You can change the line to 您可以将行更改为

pdbList.retrieve_pdb_file(pdb_id, file_format='pdb')

and you should get a PDB file and the rest of the code runs through.. 然后您应该获得一个PDB文件,其余的代码将继续运行。

