# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
 
"""mmCIF parser"""
 
from __future__ import print_function
 
from string import ascii_letters
 
import numpy
 
from Bio._py3k import range
 
from Bio.PDB.MMCIF2Dict import MMCIF2Dict
from Bio.PDB.StructureBuilder import StructureBuilder
from Bio.PDB.PDBExceptions import PDBConstructionException
 
 
class MMCIFParser(object):
    def get_structure(self, structure_id, filename):
        self._mmcif_dict=MMCIF2Dict(filename)
        self._structure_builder=StructureBuilder()
        self._build_structure(structure_id)
        return self._structure_builder.get_structure()
 
    def _build_structure(self, structure_id):
        mmcif_dict=self._mmcif_dict
        atom_id_list=mmcif_dict["_atom_site.label_atom_id"]
        residue_id_list=mmcif_dict["_atom_site.label_comp_id"]
        try:
            element_list = mmcif_dict["_atom_site.type_symbol"]
        except KeyError:
            element_list = None
        seq_id_list=mmcif_dict["_atom_site.label_seq_id"]
        chain_id_list=mmcif_dict["_atom_site.label_asym_id"]
        x_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_x"]]
        y_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_y"]]
        z_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_z"]]
        alt_list=mmcif_dict["_atom_site.label_alt_id"]
        b_factor_list=mmcif_dict["_atom_site.B_iso_or_equiv"]
        occupancy_list=mmcif_dict["_atom_site.occupancy"]
        fieldname_list=mmcif_dict["_atom_site.group_PDB"]
        try:
            serial_list = [int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]]
        except KeyError:
            # No model number column
            serial_list = None
        except ValueError:
            # Invalid model number (malformed file)
            raise PDBConstructionException("Invalid model number")
        try:
            aniso_u11=mmcif_dict["_atom_site.aniso_U[1][1]"]
            aniso_u12=mmcif_dict["_atom_site.aniso_U[1][2]"]
            aniso_u13=mmcif_dict["_atom_site.aniso_U[1][3]"]
            aniso_u22=mmcif_dict["_atom_site.aniso_U[2][2]"]
            aniso_u23=mmcif_dict["_atom_site.aniso_U[2][3]"]
            aniso_u33=mmcif_dict["_atom_site.aniso_U[3][3]"]
            aniso_flag=1
        except KeyError:
            # no anisotropic B factors
            aniso_flag=0
        # if auth_seq_id is present, we use this.
        # Otherwise label_seq_id is used.
        if "_atom_site.auth_seq_id" in mmcif_dict:
            seq_id_list=mmcif_dict["_atom_site.auth_seq_id"]
        else:
            seq_id_list=mmcif_dict["_atom_site.label_seq_id"]
        # Now loop over atoms and build the structure
        current_chain_id=None
        current_residue_id=None
        structure_builder=self._structure_builder
        structure_builder.init_structure(structure_id)
        structure_builder.init_seg(" ")
        # Historically, Biopython PDB parser uses model_id to mean array index
        # so serial_id means the Model ID specified in the file
        current_model_id = 0
        current_serial_id = 0
        for i in range(0, len(atom_id_list)):
            x=x_list[i]
            y=y_list[i]
            z=z_list[i]
            resname=residue_id_list[i]
            chainid=chain_id_list[i]
            altloc=alt_list[i]
            if altloc==".":
                altloc=" "
            resseq=seq_id_list[i]
            name=atom_id_list[i]
            # occupancy & B factor
            try:
                tempfactor=float(b_factor_list[i])
            except ValueError:
                raise PDBConstructionException("Invalid or missing B factor")
            try:
                occupancy=float(occupancy_list[i])
            except ValueError:
                raise PDBConstructionException("Invalid or missing occupancy")
            fieldname=fieldname_list[i]
            if fieldname=="HETATM":
                hetatm_flag="H"
            else:
                hetatm_flag=" "
            if serial_list is not None:
                # model column exists; use it
                serial_id = serial_list[i]
                if current_serial_id != serial_id:
                    # if serial changes, update it and start new model
                    current_serial_id = serial_id
                    structure_builder.init_model(current_model_id, current_serial_id)
                    current_model_id += 1
            else:
                # no explicit model column; initialize single model
                structure_builder.init_model(current_model_id)
            if current_chain_id!=chainid:
                current_chain_id=chainid
                structure_builder.init_chain(current_chain_id)
                current_residue_id=resseq
                icode, int_resseq=self._get_icode(resseq)
                structure_builder.init_residue(resname, hetatm_flag, int_resseq,
                    icode)
            elif current_residue_id!=resseq:
                current_residue_id=resseq
                icode, int_resseq=self._get_icode(resseq)
                structure_builder.init_residue(resname, hetatm_flag, int_resseq,
                    icode)
            coord=numpy.array((x, y, z), 'f')
            element = element_list[i] if element_list else None
            structure_builder.init_atom(name, coord, tempfactor, occupancy, altloc,
                name, element=element)
            if aniso_flag==1:
                u=(aniso_u11[i], aniso_u12[i], aniso_u13[i],
                    aniso_u22[i], aniso_u23[i], aniso_u33[i])
                mapped_anisou = [float(x) for x in u]
                anisou_array=numpy.array(mapped_anisou, 'f')
                structure_builder.set_anisou(anisou_array)
        # Now try to set the cell
        try:
            a=float(mmcif_dict["_cell.length_a"])
            b=float(mmcif_dict["_cell.length_b"])
            c=float(mmcif_dict["_cell.length_c"])
            alpha=float(mmcif_dict["_cell.angle_alpha"])
            beta=float(mmcif_dict["_cell.angle_beta"])
            gamma=float(mmcif_dict["_cell.angle_gamma"])
            cell=numpy.array((a, b, c, alpha, beta, gamma), 'f')
            spacegroup=mmcif_dict["_symmetry.space_group_name_H-M"]
            spacegroup=spacegroup[1:-1] # get rid of quotes!!
            if spacegroup is None:
                raise Exception
            structure_builder.set_symmetry(spacegroup, cell)
        except:
            pass    # no cell found, so just ignore
 
    def _get_icode(self, resseq):
        """Tries to return the icode. In MMCIF files this is just part of
        resseq! In PDB files, it's a separate field."""
        last_resseq_char=resseq[-1]
        if last_resseq_char in ascii_letters:
            icode=last_resseq_char
            int_resseq=int(resseq[0:-1])
        else:
            icode=" "
            int_resseq=int(resseq)
        return icode, int_resseq
 
 
if __name__=="__main__":
    import sys
 
    if len(sys.argv) != 2:
        print("Usage: python MMCIFparser.py filename")
        raise SystemExit
    filename=sys.argv[1]
 
    p=MMCIFParser()
 
    structure=p.get_structure("test", filename)
 
    for model in structure.get_list():
        print(model)
        for chain in model.get_list():
            print(chain)
            print("Found %d residues." % len(chain.get_list()))