# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""Parser for PDB files."""
from __future__ import print_function
import warnings
    import numpy
    from Bio import MissingPythonDependencyError
    raise MissingPythonDependencyError(
        "Install NumPy if you want to use the PDB parser.")
from Bio.File import as_handle
from Bio.PDB.PDBExceptions import PDBConstructionException
from Bio.PDB.PDBExceptions import PDBConstructionWarning
from Bio.PDB.StructureBuilder import StructureBuilder
from Bio.PDB.parse_pdb_header import _parse_pdb_header_list
# If PDB spec says "COLUMNS 18-20" this means line[17:20]
class PDBParser(object):
    Parse a PDB file and return a Structure object.
    def __init__(self, PERMISSIVE=True, get_header=False,
                 structure_builder=None, QUIET=False):
        The PDB parser call a number of standard methods in an aggregated
        StructureBuilder object. Normally this object is instanciated by the
        PDBParser object itself, but if the user provides his/her own
        StructureBuilder object, the latter is used instead.
        o PERMISSIVE - Evaluated as a Boolean. If false, exceptions in
        constructing the SMCRA data structure are fatal. If true (DEFAULT),
        the exceptions are caught, but some residues or atoms will be missing.
        o structure_builder - an optional user implemented StructureBuilder class.
        o QUIET - Evaluated as a Boolean. If true, warnings issued in constructing
        the SMCRA data will be suppressed. If false (DEFAULT), they will be shown.
        These warnings might be indicative of problems in the PDB file!
        if structure_builder is not None:
            self.structure_builder = structure_builder
            self.structure_builder = StructureBuilder()
        self.header = None
        self.trailer = None
        self.line_counter = 0
        self.PERMISSIVE = bool(PERMISSIVE)
        self.QUIET = bool(QUIET)
    # Public methods
    def get_structure(self, id, file):
        """Return the structure.
        o id - string, the id that will be used for the structure
        o file - name of the PDB file OR an open filehandle
        if self.QUIET:
            warning_list = warnings.filters[:]
            warnings.filterwarnings("ignore", category=PDBConstructionWarning)
        self.header = None
        self.trailer = None
        # Make a StructureBuilder instance (pass id of structure as parameter)
        with as_handle(file) as handle:
        # Return the Structure instance
        structure = self.structure_builder.get_structure()
        if self.QUIET:
            warnings.filters = warning_list
        return structure
    def get_header(self):
        "Return the header."
        return self.header
    def get_trailer(self):
        "Return the trailer."
        return self.trailer
    # Private methods
    def _parse(self, header_coords_trailer):
        "Parse the PDB file."
        # Extract the header; return the rest of the file
        self.header, coords_trailer = self._get_header(header_coords_trailer)
        # Parse the atomic data; return the PDB file trailer
        self.trailer = self._parse_coordinates(coords_trailer)
    def _get_header(self, header_coords_trailer):
        "Get the header of the PDB file, return the rest."
        structure_builder = self.structure_builder
        i = 0
        for i in range(0, len(header_coords_trailer)):
            structure_builder.set_line_counter(i + 1)
            line = header_coords_trailer[i]
            record_type = line[0:6]
            if record_type == "ATOM  " or record_type == "HETATM" or record_type == "MODEL ":
        header = header_coords_trailer[0:i]
        # Return the rest of the coords+trailer for further processing
        self.line_counter = i
        coords_trailer = header_coords_trailer[i:]
        header_dict = _parse_pdb_header_list(header)
        return header_dict, coords_trailer
    def _parse_coordinates(self, coords_trailer):
        "Parse the atomic data in the PDB file."
        local_line_counter = 0
        structure_builder = self.structure_builder
        current_model_id = 0
        # Flag we have an open model
        model_open = 0
        current_chain_id = None
        current_segid = None
        current_residue_id = None
        current_resname = None
        for i in range(0, len(coords_trailer)):
            line = coords_trailer[i]
            record_type = line[0:6]
            global_line_counter = self.line_counter + local_line_counter + 1
            if record_type == "ATOM  " or record_type == "HETATM":
                # Initialize the Model - there was no explicit MODEL record
                if not model_open:
                    current_model_id += 1
                    model_open = 1
                fullname = line[12:16]
                # get rid of whitespace in atom names
                split_list = fullname.split()
                if len(split_list) != 1:
                    # atom name has internal spaces, e.g. " N B ", so
                    # we do not strip spaces
                    name = fullname
                    # atom name is like " CA ", so we can strip spaces
                    name = split_list[0]
                altloc = line[16]
                resname = line[17:20]
                chainid = line[21]
                    serial_number = int(line[6:11])
                    serial_number = 0
                resseq = int(line[22:26].split()[0])  # sequence identifier
                icode = line[26]  # insertion code
                if record_type == "HETATM":  # hetero atom flag
                    if resname == "HOH" or resname == "WAT":
                        hetero_flag = "W"
                        hetero_flag = "H"
                    hetero_flag = " "
                residue_id = (hetero_flag, resseq, icode)
                # atomic coordinates
                    x = float(line[30:38])
                    y = float(line[38:46])
                    z = float(line[46:54])
                    # Should we allow parsing to continue in permissive mode?
                    # If so, what coordinates should we default to?  Easier to abort!
                    raise PDBConstructionException("Invalid or missing coordinate(s) at line %i."
                                                   % global_line_counter)
                coord = numpy.array((x, y, z), "f")
                # occupancy & B factor
                    occupancy = float(line[54:60])
                    self._handle_PDB_exception("Invalid or missing occupancy",
                    occupancy = None # Rather than arbitrary zero or one
                    bfactor = float(line[60:66])
                    self._handle_PDB_exception("Invalid or missing B factor",
                    bfactor = 0.0  # The PDB use a default of zero if the data is missing
                segid = line[72:76]
                element = line[76:78].strip()
                if current_segid != segid:
                    current_segid = segid
                if current_chain_id != chainid:
                    current_chain_id = chainid
                    current_residue_id = residue_id
                    current_resname = resname
                        structure_builder.init_residue(resname, hetero_flag, resseq, icode)
                    except PDBConstructionException as message:
                        self._handle_PDB_exception(message, global_line_counter)
                elif current_residue_id != residue_id or current_resname != resname:
                    current_residue_id = residue_id
                    current_resname = resname
                        structure_builder.init_residue(resname, hetero_flag, resseq, icode)
                    except PDBConstructionException as message:
                        self._handle_PDB_exception(message, global_line_counter)
                # init atom
                    structure_builder.init_atom(name, coord, bfactor, occupancy, altloc,
                                                fullname, serial_number, element)
                except PDBConstructionException as message:
                    self._handle_PDB_exception(message, global_line_counter)
            elif record_type == "ANISOU":
                anisou = [float(x) for x in (line[28:35], line[35:42], line[43:49],
                                             line[49:56], line[56:63], line[63:70])]
                # U's are scaled by 10^4
                anisou_array = (numpy.array(anisou, "f") / 10000.0).astype("f")
            elif record_type == "MODEL ":
                    serial_num = int(line[10:14])
                    self._handle_PDB_exception("Invalid or missing model serial number",
                    serial_num = 0
                structure_builder.init_model(current_model_id, serial_num)
                current_model_id += 1
                model_open = 1
                current_chain_id = None
                current_residue_id = None
            elif record_type == "END   " or record_type == "CONECT":
                # End of atomic data, return the trailer
                self.line_counter += local_line_counter
                return coords_trailer[local_line_counter:]
            elif record_type == "ENDMDL":
                model_open = 0
                current_chain_id = None
                current_residue_id = None
            elif record_type == "SIGUIJ":
                # standard deviation of anisotropic B factor
                siguij = [float(x) for x in (line[28:35], line[35:42], line[42:49],
                                             line[49:56], line[56:63], line[63:70])]
                # U sigma's are scaled by 10^4
                siguij_array = (numpy.array(siguij, "f") / 10000.0).astype("f")
            elif record_type == "SIGATM":
                # standard deviation of atomic positions
                sigatm = [float(x) for x in (line[30:38], line[38:45], line[46:54],
                                             line[54:60], line[60:66])]
                sigatm_array = numpy.array(sigatm, "f")
            local_line_counter += 1
        # EOF (does not end in END or CONECT)
        self.line_counter = self.line_counter + local_line_counter
        return []
    def _handle_PDB_exception(self, message, line_counter):
        This method catches an exception that occurs in the StructureBuilder
        object (if PERMISSIVE), or raises it again, this time adding the
        PDB line number to the error message.
        message = "%s at line %i." % (message, line_counter)
        if self.PERMISSIVE:
            # just print a warning - some residues/atoms may be missing
            warnings.warn("PDBConstructionException: %s\n"
                          "Exception ignored.\n"
                          "Some atoms or residues may be missing in the data structure."
                          % message, PDBConstructionWarning)
            # exceptions are fatal - raise again with new message (including line nr)
            raise PDBConstructionException(message)
if __name__ == "__main__":
    import sys
    p = PDBParser(PERMISSIVE=True)
    filename = sys.argv[1]
    s = p.get_structure("scr", filename)
    for m in s:
        p = m.get_parent()
        assert(p is s)
        for c in m:
            p = c.get_parent()
            assert(p is m)
            for r in c:
                p = r.get_parent()
                assert(p is c)
                for a in r:
                    p = a.get_parent()
                    if not p is r:
                        print("%s %s" % (p, r))