Source code for TEMPy.protein.prot_rep_biopy

# =============================================================================
#     This file is part of TEMPy.
#
#     TEMPy is a software designed to help the user in the manipulation
#     and analyses of macromolecular assemblies using 3D electron microscopy
#     maps.
#
#     Copyright  2015 Birkbeck College University of London.
#
#     Authors: Maya Topf, Daven Vasishtan, Arun Prasad Pandurangan,
#     Irene Farabella, Agnel-Praveen Joseph, Harpal Sahota
#
#     This software is made available under GPL V3 license
#     http://www.gnu.org/licenses/gpl-3.0.html
#
#
#     Please cite your use of TEMPy in published work:
#
#     Farabella, I., Vasishtan, D., Joseph, A.P., Pandurangan, A.P., Sahota, H.
#     & Topf, M. (2015). J. Appl. Cryst. 48.
#
# =============================================================================

import copy
import math
import random
import sys
import collections

import numpy as np
import gemmi

from TEMPy.protein.mmcif import mmcif_writer
from TEMPy.math.quaternion import Quaternion
import TEMPy.math.vector as Vector


# Useful global constants
ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890abcdefghijklmnopqrstuvwxyz"
ATOMIC_MASSES = {"H": 1, "C": 12, "N": 14, "O": 16, "S": 32}
# Taken from http://www.ccdc.cam.ac.uk/products/csd/radii/table.php4
VDW_RADII = {
    "H": 1.09,
    "C": 1.7,
    "N": 1.55,
    "O": 1.52,
    "S": 1.8,
    "P": 1.8,
    "Cl": 1.75,
    "CL": 1.75,
    "Cu": 1.4,
    "CU": 1.4,
}
SEQUENCE_CONSTS = {
    "GLY": "G",
    "ALA": "A",
    "VAL": "V",
    "LEU": "L",
    "ILE": "I",
    "MET": "M",
    "PHE": "F",
    "TRP": "W",
    "PRO": "P",
    "SER": "S",
    "THR": "T",
    "CYS": "C",
    "TYR": "Y",
    "ASN": "N",
    "GLN": "Q",
    "ASP": "D",
    "GLU": "E",
    "LYS": "K",
    "ARG": "R",
    "HIS": "H",
}
ROOT_2_PI = (2 * math.pi) ** 0.5
AA_MASS = {
    "A": 71.0788,
    "R": 156.1875,
    "N": 114.1038,
    "D": 115.0886,
    "C": 103.1388,
    "E": 129.1155,
    "Q": 128.1307,
    "G": 57.0519,
    "H": 137.1411,
    "I": 113.1594,
    "L": 113.1594,
    "K": 128.1741,
    "M": 131.1926,
    "F": 147.1766,
    "P": 97.1167,
    "S": 87.0782,
    "T": 101.1051,
    "W": 186.2132,
    "Y": 163.1760,
    "V": 99.1326,
}


[docs]class Atom:
    """
    A generic atomic class.  Subclass should ensure desired fields are set.
    """

    def __init__(
        self,
        atom_name,
        coords,
        record_name="ATOM",
        serial=None,
        temp_factor=None,
        alt_loc=None,
        icode="",
        charge="",
        element="",
        occ=None,
        res=None,
        model=None,
        chain=None,
        res_no=None,
    ):
        # Set atomic information
        self.atom_name = atom_name
        self.mass = ATOMIC_MASSES.get(atom_name[0], 1.0)
        self.vdw = VDW_RADII.get(atom_name[0], 1.7)
        self.elem = element
        self.charge = charge
        self.temp_fac = temp_factor

        self.alt_loc = alt_loc
        self.icode = icode
        self.occ = occ
        self.serial = serial

        # Set atomic coordinates
        x, y, z = coords
        self.init_x = self.x = float(x)
        self.init_y = self.y = float(y)
        self.init_z = self.z = float(z)

        # Set topological information
        self.res = res
        self.res_no = res_no
        self.chain = chain
        self.model = model

        self.record_name = record_name

    def __repr__(self):
        return (
            f"({self.get_res()} {self.res_no} {self.chain}: "
            + f"{self.x:.3f}, {self.y:.3f}, {self.z:.3f})"
        )

[docs]    def copy(self):
        """
        Return:
            Copy of the Atom instance.
        """
        return copy.deepcopy(self)

[docs]    def get_mass(self):
        """
        Return:
            Atom mass.
        """
        return self.mass

[docs]    def distance_from_init_position(self):
        """
        Return:
            Distance from initial position.
        """
        return (
            (self.x - self.init_x) ** 2
            + (self.y - self.init_y) ** 2
            + (self.z - self.init_z) ** 2
        ) ** 0.5

[docs]    def distance_from_atom(self, atom):
        """
        Return:
            Distance from another atom.
        """
        return (
            (self.x - atom.x) ** 2 + (self.y - atom.y) ** 2 + (self.z - atom.z) ** 2
        ) ** 0.5

[docs]    def reset_position(self):
        """
        Translate atom back in its initial position.

        Return:
            initial position co-ordinates of atom.
        """
        self.x = self.init_x
        self.y = self.init_y
        self.z = self.init_z

[docs]    def change_init_position(self):
        """
        Change initial position co-ordinates of atom to current position.

        Return:
            new initial position co-ordinates of atom.
        """
        self.init_x = self.x
        self.init_y = self.y
        self.init_z = self.z

[docs]    def translate(self, x, y, z):
        """
        Translate the atom.

        Arguments:
            *x, y, z*
                distance in Angstroms in respective directions to move atom.

        Return:
            Translate atom object.
        """
        self.x += x
        self.y += y
        self.z += z

[docs]    def map_grid_position(self, densMap, index=True):
        """
        Arguments:
            *densMap*
                EM map object consisting the 3D grid of density values.
            *index*
                Boolean to return co-ordinates of grid as integer index.
                If false co-ordinates are returned as floats.
                Default: True

        Return:
              The co-ordinates and density value of the grid point in a density
              map closest to this atom.
              Return 0 if atom is outside of map.
        """
        x_origin = densMap.x_origin()
        y_origin = densMap.y_origin()
        z_origin = densMap.z_origin()
        apix = densMap.apix
        x_size = densMap.x_size()
        y_size = densMap.y_size()
        z_size = densMap.z_size()
        if index:
            x_pos = int(round((self.get_x() - x_origin) / apix[0], 0))
            y_pos = int(round((self.get_y() - y_origin) / apix[1], 0))
            z_pos = int(round((self.get_z() - z_origin) / apix[2], 0))
        else:
            x_pos = (self.get_x() - x_origin) / apix[0]
            y_pos = (self.get_y() - y_origin) / apix[1]
            z_pos = (self.get_z() - z_origin) / apix[2]
        if (x_size > x_pos >= 0) and (y_size > y_pos >= 0) and (z_size > z_pos >= 0):
            return x_pos, y_pos, z_pos, self.mass
        else:
            return 0

[docs]    def matrix_transform(self, rot_mat):
        """
        Transform atom using a 3x3 matrix

        Arguments:
            *rot_mat*
                a 3x3 matrix instance.

        Return:
            Transformed position of atom object
        """
        atom_mat = np.array([self.x, self.y, self.z])
        new_pos = rot_mat @ atom_mat
        self.x = float(new_pos[0])
        self.y = float(new_pos[1])
        self.z = float(new_pos[2])

[docs]    def get_pos_vector(self):
        """
        Return:
            Vector instance containing 3D coordinates of the atom.
        """
        return Vector.Vector(self.x, self.y, self.z)

[docs]    def get_pos_mass(self):
        """
        Return:
            An array containing Vector instances containing 3D coordinates of
            the atom and and its corresponding mass.
        """
        return [self.x, self.y, self.z, self.mass]

[docs]    def get_x(self):
        """
        Return:
            x co-ordinate of atom.
        """
        return float(self.x)

[docs]    def get_y(self):
        """
        Return:
            y co-ordinate of atom.
        """
        return float(self.y)

[docs]    def get_z(self):
        """
        Return:
            z co-ordinate of atom.
        """
        return float(self.z)

[docs]    def set_x(self, mod):
        """
        Change the x co-ordinate of an atom based on the argument.

        Arguments:
            *mod*
                float value
        Return:
            new x co-ordinate
        """
        self.x = mod

[docs]    def set_y(self, mod):
        """
        Change the y co-ordinate of an atom based on the argument.

        Arguments:
            *mod*
                float value
        Return:
            new y co-ordinate
        """
        self.y = mod

[docs]    def set_z(self, mod):
        """

        Change the z co-ordinate of an atom based on the argument.

        Arguments:
            *mod*
                float value
        Return:
            new x co-ordinate
        """
        self.z = mod

[docs]    def get_name(self):
        """
        atom name (ie. 'CA' or 'O')

        Return:
            atom name.
        """
        return self.atom_name

[docs]    def get_res(self):
        """

        Return:
            three letter residue code corresponding to the atom (i.e 'ARG').
        """
        return self.res

[docs]    def get_res_no(self):
        """
        Return:
            residue number corresponding to the atom.
        """
        return self.res_no

[docs]    def get_id_no(self):
        """
        Return:
            string of atom serial number.
        """
        return self.serial

    def _writeTerm(self):
        line = ""
        line += "TER".ljust(6)
        line += str(self.serial + 1).rjust(5) + " "
        line += "".center(4)
        line += self.alt_loc.ljust(1)
        line += self.res.ljust(4)
        line += self.chain.ljust(1)
        line += str(self.res_no).rjust(4)
        return line

[docs]    def write_to_PDB(self):
        """
        Writes a PDB ATOM record based in the atom attributes to a file.
        """
        line = ""
        line += self.record_name.ljust(6)
        line += str(self.serial).rjust(5) + " "
        line += self.atom_name.center(4)
        if self.alt_loc == ".":
            line += " "
        else:
            line += self.alt_loc.ljust(1)
        line += self.res.ljust(3) + " "
        line += self.chain.ljust(1)
        line += str(self.res_no).rjust(4)
        if self.icode == "?":
            line += " " + "   "
        else:
            line += str(self.icode).ljust(1) + "   "
        x = "%.3f" % self.x
        y = "%.3f" % self.y
        z = "%.3f" % self.z
        line += x.rjust(8)
        line += y.rjust(8)
        line += z.rjust(8)
        occ = "%.2f" % float(self.occ)
        temp_fac = "%.2f" % float(self.temp_fac)
        line += occ.rjust(6)
        line += temp_fac.rjust(6) + "          "
        line += self.elem.strip().rjust(2)
        line += str(self.charge).strip().rjust(2)
        return line + "\n"

    def rotate_by_quaternion(self, q_param):
        l = [0.0, self.x, self.y, self.z]  # noqa:E741
        atom_quat = Quaternion(l)
        q = Quaternion(q_param)
        q_conjugate = q.conjugate(q)
        resultant_quat = q.multiply_3(atom_quat, q_conjugate, q)
        w, x, y, z = resultant_quat.param
        self.x = float(x)
        self.y = float(y)
        self.z = float(z)

    # added by Scott(SWH)
[docs]    def gridpoints_around_atom(self, densmap, radius=0.0, dx=0, dy=0, dz=0):
        """
        Get the x,y,z box boundaries of a set radius around an atom
        If radius is set, dx, dy, and dz will be calculated using radius,
        if not dx, dy, and dz given will be used.
        Arguments:
            *densmap*
                Map instance containing the grid
            *radius*
                Cutoff radius around atom (Angstroms)
            *dx*
                gridpoint distance along x (in integers)
            *dy*
                gridpoint distance along y (in integers)
            *dz*
                gridpoint distance along z (in integers)
        Return:
            Minimum and maximum grid index along each direction of x, y, z
            e.g. [minx, maxx, miny, maxy, minz, maxz]
        """
        xyz = self.map_grid_position(densmap)
        if not isinstance(xyz, int):
            x, y, z = xyz[:3]
        else:
            raise ValueError("Atom map grid position is out of box boundary.")
        if radius > 0.0:
            dx = int(np.ceil(radius / densmap.apix[0]))
            dy = int(np.ceil(radius / densmap.apix[1]))
            dz = int(np.ceil(radius / densmap.apix[2]))
        if dx == 0 and dy == 0 and dz == 0:
            print("Gridpoint distances are 0 along dx, dy and dz.")
            print("Please set the radius or dx, dy, dz to more than 0.")
            return [0, 0, 0, 0, 0, 0]
        minx = x - dx
        miny = y - dy
        minz = z - dz
        maxx = x + dx
        maxy = y + dy
        maxz = z + dz
        minx = min(max(minx, 0), densmap.x_size() - 1)
        miny = min(max(miny, 0), densmap.y_size() - 1)
        minz = min(max(minz, 0), densmap.z_size() - 1)
        maxx = max(min(maxx, densmap.x_size()), 0)
        maxy = max(min(maxy, densmap.y_size()), 0)
        maxz = max(min(maxz, densmap.z_size()), 0)

        return [minx, maxx, miny, maxy, minz, maxz]


[docs]class gemmiAtom(Atom):
    """
    Class representing an atom as read from a PDB/mmCIF file using gemmi

    gemmi docs here: https://gemmi.readthedocs.io/
    """

    def __init__(
        self,
        chain,
        entity,
        residue,
        atom,
    ):

        # "fixing" some gemmi flags to be writable
        if residue.het_flag == "H":
            record_name = "HETATM"
        else:
            record_name = "ATOM"
        if atom.has_altloc():
            pass
        else:
            atom.altloc = "."
        if residue.seqid.icode == " ":
            icode = "?"
        else:
            icode = residue.seqid.icode

        super().__init__(
            atom.name,
            (atom.pos.x, atom.pos.y, atom.pos.z),
            record_name=record_name,
            serial=atom.serial,
            temp_factor=atom.b_iso,
            alt_loc=atom.altloc,
            icode=icode,
            charge=atom.charge,
            element=atom.element.name,
            occ=atom.occ,
            res=residue.name,
            model=entity.name,
            chain=chain.name,
            res_no=residue.seqid.num,
        )

        # grab some extra variables for writing cif files
        self.asym_id = residue.subchain
        self.model = entity.name
        self.seq_id = residue.label_seq
        self.res_flag = residue.flag
        self.entity_type = residue.entity_type
        self.segment = residue.segment
        self.calc_flag = atom.calc_flag
        self.atom_flag = atom.flag
        self.tls_group_id = atom.tls_group_id
        self.polymer_type = entity.polymer_type
        self.seqid_icode = residue.seqid.icode

[docs]    def get_atom_vals(self):
        """returns list ready for parsing by gemmi for file writing"""
        atom_block = [
            self.record_name,
            self.serial,
            self.elem,
            self.atom_name,
            self.alt_loc,
            self.res,
            self.asym_id,
            self.model,
            self.seq_id,
            self.icode,
            self.x,
            self.y,
            self.z,
            self.occ,
            self.temp_fac,
            self.charge,
            self.res_no,
            self.chain,
        ]
        return atom_block

[docs]    def get_str_atom_vals(self):
        """returns list ready for parsing by gemmi for file writing"""
        atom_block = [
            str(self.record_name),
            str(self.serial),
            str(self.elem),
            str(self.atom_name),
            str(self.alt_loc),
            str(self.res),
            str(self.asym_id),
            str(self.model),
            str(self.seq_id),
            str(self.icode),
            str(round(self.x, 3)),
            str(round(self.y, 3)),
            str(round(self.z, 3)),
            str(self.occ),
            "%.2f" % self.temp_fac,
            str(self.charge),
            str(self.res_no),
            str(self.chain),
        ]
        return atom_block

[docs]    def get_col_labels(self):
        """returns list ready for parsing by gemmi for file writing"""
        col_ids = [
            "group_PDB",
            "id",
            "type_symbol",
            "label_atom_id",
            "label_alt_id",
            "label_comp_id",
            "label_asym_id",
            "label_entity_id",
            "label_seq_id",
            "pdbx_PDB_ins_code",
            "Cartn_x",
            "Cartn_y",
            "Cartn_z",
            "occupancy",
            "B_iso_or_equiv",
            "pdbx_formal_charge",
            "auth_seq_id",
            "auth_asym_id",
        ]
        return col_ids

    def set_tempy_score(self, score_name, score_value):
        self.tempy_scores[score_name] = score_value

    def get_tempy_score(self, score_name):
        return self.tempy_score[score_name]


[docs]class BioPy_Structure:
    """
    A class representing a Structure object.
    """

    def __init__(self, atomList, filename="Unknown", header="", footer=""):
        """
        Initialise using a string of the relevant pdb file name or a numpy
        array of Atom objects.

        Arguments:
            *pdbFileOrList*
                String of pdb file name or array of Atom objects
        """
        self.header = header
        self.footer = footer
        self.filename = filename
        if type(atomList) == np.ndarray:
            self.atomList = atomList[:]
        if type(atomList) == list:
            self.atomList = np.array(atomList)
        self.CoM = self.calculate_centre_of_mass()
        self.initCoM = self.CoM.copy()

    def __getitem__(self, index):
        return self.atomList[index]

    def __len__(self):
        return len(self.atomList)

    def __repr__(self):
        if not self.filename == "Unknown":
            s = "Filename: " + self.filename + "\n"
        else:
            s = ""
        s += "No Of Atoms: " + str(len(self)) + "\n"
        s += "First Atom: " + str(self.atomList[0]) + "\n"
        s += "Last Atom: " + str(self.atomList[-1]) + "\n"
        return s

    def get_pdb_id(self):
        return self.pdb_id

[docs]    def write_to_PDB(self, filename, hetatom=False):
        """
        Write Structure instance to PDB file.

        Arguments:
            *filename*
                output filename.
        """
        self.footer = ""

        if filename[-4:] == ".pdb":
            g = open(filename, "w")
        else:
            g = open(filename + ".pdb", "w")
        header = """EXPDTA    MODEL GENERATE WITH TEMPY
REMARK    MODEL GENERATE WITH TEMPY
MODEL      1
"""
        g.write(header)
        structList = self.split_into_chains()
        hetatmlist = []
        chainlisttot = []
        last_prot_num = 0
        for chain in range(len(structList)):
            chainlist = []
            for x in structList[chain].atomList:
                if x.record_name == "HETATM":
                    hetatmlist.append(x.copy())
                else:
                    chainlist.append(x.copy())
            chainlisttot.append(BioPy_Structure(chainlist))
        for strchain in range(len(chainlisttot)):
            if strchain == 0:
                chainlisttot[strchain].renumber_atoms()
            else:
                start_num = chainlisttot[strchain - 1].atomList[-1].serial
                chainlisttot[strchain].renumber_atoms(start_num=start_num + 2)
            for x in chainlisttot[strchain].atomList:
                g.write(x.write_to_PDB())
                last_prot_num += 1
            line = chainlisttot[strchain].atomList[-1]._writeTerm() + "\n"
            g.write(line)
        if hetatom is True:
            hetstr = BioPy_Structure(hetatmlist)
            hetchain = hetstr.split_into_chains()
            for chain in range(len(hetchain)):
                if chain == 0:
                    hetchain[chain].renumber_atoms(start_num=last_prot_num + 2)
                else:
                    start_num = hetchain[chain - 1].atomList[-1].serial
                    hetchain[chain].renumber_atoms(start_num=start_num + 2)
                for xhet in hetchain[chain].atomList:
                    g.write(xhet.write_to_PDB())
                line = hetchain[chain].atomList[-1]._writeTerm() + "\n"
                g.write(line)
        lineend = ""
        lineend += "ENDMDL".ljust(6) + "\n"
        g.write(lineend)
        g.write(self.footer)
        g.close()

[docs]    def copy(self):
        """
        Return:
            Copy of Structure instance.
        """
        newAtomList = []
        for atom in self.atomList:
            newAtomList.append(atom.copy())
        return BioPy_Structure(newAtomList)

    def get_number_of_atoms(self):
        self.no_of_atoms = len(self)
        return self.no_of_atoms

    def get_model_atoms(self, model_ID):

        return [i for i in self.atomList if i.model == model_ID]

[docs]    def calculate_centre_of_mass(self):
        """
        Return:
            Center of mass of structure as a Vector instance.
        """
        x_CoM = 0.0
        y_CoM = 0.0
        z_CoM = 0.0
        massTotal = 0.0
        for atom in self.atomList:
            x = atom.get_x()
            y = atom.get_y()
            z = atom.get_z()
            m = atom.get_mass()
            x_CoM += x * m
            y_CoM += y * m
            z_CoM += z * m
            massTotal += m
        x_CoM /= massTotal
        y_CoM /= massTotal
        z_CoM /= massTotal
        return Vector.Vector(x_CoM, y_CoM, z_CoM)

[docs]    def translate(self, x, y, z):
        """
        Translate the structure.

        Arguments:
            *x, y, z*
                distance in Angstroms in respective directions to move
                structure.
        Return:
            Translated Structure instance
        """
        for atom in self.atomList:
            atom.translate(x, y, z)
        self.CoM = self.CoM.translate(x, y, z)

[docs]    def rotate_by_axis_angle(
        self,
        x,
        y,
        z,
        turn,
        x_trans=0,
        y_trans=0,
        z_trans=0,
        rad=False,
        com=False,
    ):
        """
        Rotate the Structure instance around its centre.

        Arguments:
            *turn*
                angle (in radians if rad == True, else in degrees) to rotate
                map.
            *x,y,z*
                axis to rotate about, ie. x,y,z =  0,0,1 rotates the structure
                round the xy-plane.
            *x_trans, y_trans, z_trans*
                extra translational movement if required.
            *com*
                centre of mass around which to rotate the structure. If False,
                rotates around centre of mass of structure.
        """
        mat = Vector.axis_angle_to_matrix(x, y, z, turn, rad)

        if not com:
            com = self.CoM.copy()

        newcom = com.matrix_transform(mat)
        offset = com - newcom
        self.matrix_transform(mat)
        self.translate(
            x_trans + offset.x,
            y_trans + offset.y,
            z_trans + offset.z,
        )

[docs]    def rotate_by_euler(
        self,
        x_turn,
        y_turn,
        z_turn,
        x_trans=0,
        y_trans=0,
        z_trans=0,
        rad=False,
        com=False,
    ):
        """
        Rotate this Structure instance around its centre.

        Arguments:
            *x_turn,y_turn,z_turn*
                Euler angles (in radians if rad == True, else in degrees) used
                to rotate structure, in order XYZ.
            *x_trans, y_trans, z_trans*
                extra translational movement if required.
            *com*
                centre of mass around which to rotate the structure. If False,
                rotates around centre of mass of structure.
        """
        mat = Vector.euler_to_matrix(x_turn, y_turn, z_turn, rad)

        if not com:
            com = self.CoM.copy()

        newcom = com.matrix_transform(mat)
        offset = com - newcom
        self.matrix_transform(mat)
        self.translate(x_trans + offset.x, y_trans + offset.y, z_trans + offset.z)

    def rotate_by_quaternion(self, q, com=False):
        if not com:
            com = self.CoM.copy()
        self.translate(-com.x, -com.y, -com.z)
        for atom in self.atomList:
            atom.rotate_by_quaternion(q)
        self.translate(com.x, com.y, com.z)

[docs]    def randomise_position(
        self, max_trans, max_rot, v_grain=30, rad=False, verbose=False
    ):
        """
        Randomise the position of the Structure instance using random rotations
        and translations.

        Arguments:
            *max_trans*
                Maximum translation permitted
            *max_rot*
                Maximum rotation permitted (in degree if rad=False)
            *v_grain*
                Graning Level for the generation of random vetors (default=30)
        Return:
            Transformed position of Structure object
        """
        t_v = Vector.random_vector(-v_grain, v_grain).unit()
        r_v = Vector.random_vector(-v_grain, v_grain).unit()

        if max_trans <= 0:
            t_dist = 0
        else:
            t_dist = random.randrange(max_trans)

        if max_rot <= 0:
            r_ang = 0
        else:
            r_ang = random.randrange(max_rot)
        t_v = t_v.times(t_dist)
        self.rotate_by_axis_angle(
            r_v.x, r_v.y, r_v.z, r_ang, t_v.x, t_v.y, t_v.z, rad=rad
        )
        if verbose:
            print(r_v.x, r_v.y, r_v.z, r_ang, t_v.x, t_v.y, t_v.z)
        return r_v.x, r_v.y, r_v.z, r_ang, t_v.x, t_v.y, t_v.z

[docs]    def matrix_transform(self, matrix):
        """
        Transform Structure using a 3x3 transformation matrix

        Arguments:
            *rot_mat*
                a 3x3 matrix instance.

        Return:
            Transformed position of Structure object
        """
        for atom in self.atomList:
            atom.matrix_transform(matrix)
        self.CoM = self.CoM.matrix_transform(matrix)

[docs]    def reorder_residues(self):
        """
        Order residues in atom list by residue number.
        (NOTE: Does not check for chain information - split by chain first).
        """
        self.atomList = list(self.atomList)
        self.atomList.sort(cmp=lambda x, y: int(x.res_no) - int(y.res_no))
        self.atomList = np.array(self.atomList)

[docs]    def get_residue_indexes(self):
        """Get the atom indexes for each residues from atomList in the
        structure

        Returns:
            residue_indexes: a dictionary containing the index positions
            for all atoms from each residue in the structure.
            Items have format: [list, of, atom, indexes, as, ints]
            Keys have format: (str(chain_label), int(residue_number))
        """

        # as of python 3.7 all dictionaries should preserve insertion order
        residue_indexes = dict()

        for n, atom in enumerate(self.atomList):
            try:
                residue_indexes[atom.chain, atom.res_no].append(n)
            except KeyError:
                residue_indexes[atom.chain, atom.res_no] = [n]

        return residue_indexes

[docs]    def number_of_residues(self):
        """Return the number of residues (and/or nucleotide bases) in the
        model"""
        prev = None
        num_residues = 0

        for atom in self.atomList:
            if (atom.res_no, atom.chain) == prev:
                continue
            else:
                prev = (atom.res_no, atom.chain)
                num_residues += 1

        return num_residues

[docs]    def renumber_residues(self, startRes=1, missingRes=[]):
        """
        Renumber the structure starting from startRes.
        Missing number list to add.

        Arguments:
            *startRes*
                Starting residue number for renumbering
            *missingRes*
                A list of missing residue numbers to add
        """
        resNo = startRes
        currentRes = self.atomList[0].res_no
        for x in self.atomList:
            if x.res_no == currentRes:
                x.res_no = resNo
            else:
                currentRes = x.res_no
                resNo += 1
                while resNo in missingRes:
                    resNo += 1
                x.res_no = resNo

[docs]    def renumber_atoms(self, start_num=1):
        """
        Renumber the atoms in the structure.
        After renumbering the starting atom number will be 1 unless start_num
        """
        for x in range(len(self.atomList)):
            if (x + 1) < 99999:
                self.atomList[x].serial = x + start_num
            else:
                self.atomList[x].serial = "*****"

    def regroup_chains(self):

        chain_indexes = self.get_atoms_in_chains()

        reordered_atoms = []
        for chain_id in chain_indexes.keys():
            for atom in self.atomList[chain_indexes[chain_id]]:
                reordered_atoms.append(atom)

        self.atomList = np.array(reordered_atoms)

[docs]    def get_atoms_in_chains(self):
        """
        TODO - reformat for BioPy_Structure
        """
        chains = collections.OrderedDict()

        for atom_no, atom in enumerate(self.atomList):
            try:
                chains[atom.chain].append(atom_no)
            except KeyError:
                chains[atom.chain] = [atom_no]

        return chains

[docs]    def iter_asym_id(self, asym_id):
        """
        Function to increase the chain "numbering" by one e.g. from D -> E or
        from HA -> IA.
        Uses the chr() and ord() functions to convert between int and ASCII str
        One should run rename_chains before to ensure all chain_ids are letters

        Input: capital letter from A -> Z
        Output: Next letter in alphabet e.g. from A -> B. For input Z, ouput is
                AA, for input ZZ output is AAA etc..
        """
        index = 0
        new_id = [i for i in asym_id]
        chain_int = ord(asym_id[0])

        if chain_int == 90:
            new_id[0] = "A"
            while True:
                index += 1
                try:
                    chain_int = ord(asym_id[index])
                except IndexError:
                    new_id.append("A")
                    break
                if chain_int == 90:
                    new_id[index] = "A"
                    continue
                else:
                    new_id[index] = chr(chain_int + 1)
                    break
        else:
            new_id[0] = chr(chain_int + 1)

        return "".join(new_id)

[docs]    def get_next_chain_id(self, chain_id, chain_list=False):
        """
        Renames chains based on mmCIF convention i.e. A -> B, Z -> 0 or g -> h
        See the ALPHABET list for standard sequence, however any string can be
        used for chain labelling.

        For structures with more than 62 chains, the labels are increased in
        length, i.e. z -> AA, zz -> AAA, AAA -> BBB

        Input: chain_id -> old chain id
        Output: new_chain_id
        """

        if not chain_list:
            chain_list = ALPHABET

        try:
            index = int(chain_list.index(chain_id[0]) + 1) + (
                (len(chain_id) - 1) * len(chain_list)
            )
        except ValueError:
            index = 0

        try:
            new_chain_label = chain_list[index]
        except IndexError:
            n = 1
            while index >= len(chain_list):
                n += 1
                index = index - len(chain_list)

            new_chain_label = chain_list[index] * n

        return new_chain_label

[docs]    def rename_chains(self, chain_list=False):
        """
        TODO - reformat for BioPy_Structure
        """
        if not chain_list:
            chain_list = ALPHABET
        else:
            noc = self.no_of_chains()
            if len(chain_list) != self.no_of_chains():
                print("No. of chains in structure = " + str(noc))
                print("Length of chain list = " + str(len(chain_list)))
                print("Chains not changed.")
                return

        ch = self.atomList[0].chain
        renum = 0
        for atom in self.atomList:
            if atom.chain == ch:
                atom.chain = chain_list[renum]
            else:
                renum += 1
                ch = atom.chain[:]
                atom.chain = chain_list[renum]

[docs]    def split_into_chains(self):
        """
        Split the structure into separate chains and returns the list of
        Structure instance for each chain.
        """
        structList = []
        currentChain = self.atomList[0].chain
        currentStruct = []
        for x in self.atomList:
            if x.chain == currentChain:
                currentStruct.append(x.copy())
            else:
                currentChain = x.chain
                structList.append(BioPy_Structure(currentStruct))
                currentStruct = [x.copy()]
        structList.append(BioPy_Structure(currentStruct))
        return structList

[docs]    def no_of_chains(self):
        """
        Return:
            the number of chains in the Structure object
        """
        a = self.split_into_chains()
        return len(a)

[docs]    def reset_position(self):
        """
        Translate structure back into initial position.
        """
        for x in self.atomList:
            x.reset_position()
        self.CoM = self.initCoM.copy()

[docs]    def change_init_position(self):
        """
        Change initial position of structure to current position.
        """
        for atom in self.atomList:
            atom.change_init_position()
        self.init_CoM = self.CoM.copy()

[docs]    def RMSD_from_init_position(self, CA=False):
        """
        Return RMSD of structure from initial position after translation.

        Arguments:
            *CA*
                True will consider only CA atoms.
                False will consider all atoms.
        Return:
            RMSD in angstrom
        """
        dists = []
        for x in self.atomList:
            if CA:
                if x.atom_name == "CA":
                    dists.append(x.distance_from_init_position())
            else:
                dists.append(x.distance_from_init_position())
        dists = np.array(dists)
        return dists.mean()

[docs]    def RMSD_from_same_structure(self, otherStruct, CA=False, write=True):
        """
        Return the RMSD between two structure instances.

        Arguments:
            *otherStruct*
                Structure instance to compare, containing the same number of
                atoms as the target instance.
            *CA*
                True will consider only CA atoms.
                False will consider all atoms.
        Return:
            RMSD in angstrom
        """
        dists = []
        for a in range(len(self.atomList)):
            if CA:
                if self.atomList[a].atom_name == "CA":
                    if otherStruct.atomList[a].atom_name == "CA":
                        dists.append(
                            self.atomList[a].distance_from_atom(otherStruct.atomList[a])
                        )
                        print(
                            self.atomList[a].atom_name,
                            otherStruct.atomList[a].atom_name,
                            self.atomList[a].res_no,
                            otherStruct.atomList[a].res_no,
                        )
                    else:
                        pass
            else:
                dists.append(
                    self.atomList[a].distance_from_atom(otherStruct.atomList[a])
                )
        dists = np.array(dists)
        return dists.mean()

[docs]    def get_vector_list(self):
        """
        Return:
            Array containing 3D Vector instances of positions of all atoms.
        """
        v = []
        for atom in self.atomList:
            v.append(atom.get_pos_vector())
        return np.array(v)

[docs]    def get_pos_mass_list(self):
        """
        Return:
            Array containing Vector instances of positions of all atoms and
            mass.
        """
        v = []
        for atom in self.atomList:
            v.append(atom.get_pos_mass())
        return np.array(v)

[docs]    def get_extreme_values(self):
        """
        Return:
            A 6-tuple containing the minimum and maximum of x, y and z
            co-ordinates of the structure.
            Given in order (min_x, max_x, min_y, max_y, min_z, max_z).
        """
        min_x = self.atomList[0].get_x()
        max_x = self.atomList[0].get_x()
        min_y = self.atomList[0].get_y()
        max_y = self.atomList[0].get_y()
        min_z = self.atomList[0].get_z()
        max_z = self.atomList[0].get_z()
        for atom in self.atomList[1:]:
            if atom.get_x() < min_x:
                min_x = atom.get_x()
            if atom.get_x() > max_x:
                max_x = atom.get_x()
            if atom.get_y() < min_y:
                min_y = atom.get_y()
            if atom.get_y() > max_y:
                max_y = atom.get_y()
            if atom.get_z() < min_z:
                min_z = atom.get_z()
            if atom.get_z() > max_z:
                max_z = atom.get_z()
        return min_x, max_x, min_y, max_y, min_z, max_z

[docs]    def get_atom_list(self):
        """
        Return:
            An array containing Atom instances of positions of all atoms as:
            [(RES 1 A: x,y,z), ... ,(RES2 1 A: x1,y1,z1)].
        """
        alist = []
        for x in self.atomList:
            alist.append(x.copy())
        return alist

[docs]    def find_same_atom(self, atom_index, otherStruct):
        """
        Find if an atom exists in the compared structure, based on atom index.

        Arguments:
            *atom_index*
                atom number
            *otherStruct*
                a structure object to compare

        Return:
            If a match is found, it returns the atom object; else it returns a
            string reporting the mismatch.
        """
        atom = self.atomList[atom_index]
        for x in otherStruct.atomList:
            if (
                x.res_no == atom.res_no
                and x.atom_name == atom.atom_name
                and atom.res == atom.res
                and x.chain == atom.chain
            ):
                return x
        return "No match of atom index %s in structure %s" % (
            atom_index,
            otherStruct,
        )

[docs]    def get_chain_list(self):
        """
        Return:
            A list of chain ID.
        """
        chain_list = []
        for x in self.atomList:
            if x.chain not in chain_list:
                chain_list.append(x.chain)
        return chain_list

[docs]    def get_chain(self, chainID):
        """
        Return:
            New Structure instance with only the requested chain.
        """
        newAtomList = []
        for x in self.atomList:
            if x.chain == chainID:
                newAtomList.append(x.copy())
        if len(newAtomList) != 0:
            return BioPy_Structure(newAtomList)
        else:
            print("Warning no chain %s found" % chainID)

    def get_dict_str(self):
        dict_ch = {}
        list_res = []
        currentChain = self.atomList[0].chain
        for x in self.atomList:
            if not x.chain == currentChain:
                dict_ch[currentChain] = list_res[:]
                currentChain = x.chain
                list_res = []
            cur_chain = x.chain
            list_res.append(x.get_res_no())
        if cur_chain not in dict_ch:
            dict_ch[cur_chain] = list_res[:]
        return dict_ch

[docs]    def get_selection(self, startRes, finishRes, chain=""):
        """
        Get a new Structure instance for the selected residues range without
        considering residues chain.

        Arguments:
            *startRes*
                Start residue number
            *finishRes*
                End residue number

        Return:
            New Structure instance
        """
        newAtomList = []
        for x in self.atomList:
            cur_chain = x.chain
            if chain:
                if not cur_chain == chain:
                    continue
            if x.get_res_no() >= int(startRes) and x.get_res_no() <= int(finishRes):
                newAtomList.append(x.copy())
        return BioPy_Structure(newAtomList)

[docs]    def break_into_segments(self, rigid_list, chain=""):
        """
        Return a list of Structure instance based on the rigid body list.

        Arguments:
            *rigid list*
                list of rigid body defined as:
                    [[riA,rfA],..,[riB,rfB]]

                where:
                    riA is the starting residues number of segment A.
                    rfA is the final residues number of segment A.
        Return:
            List of TEMPy Structure instance
        """
        structList = []
        for r in rigid_list:
            fstStruct = self.get_selection(r[0], r[1], chain)
            nxtStructs = []
            for x in range(2, len(r), 2):
                nxtStructs.append(self.get_selection(r[x], r[x + 1], chain))
            if len(nxtStructs) != 0:
                structList.append(fstStruct.combine_structures(nxtStructs))
            else:
                structList.append(fstStruct)
        if len(structList) == 0:
            print("Error: Residues not in PDB.")
            sys.exit()
        else:
            return structList

    def get_chain_ca(self, chainID):
        newAtomList = []
        for x in self.atomList:
            if x.chain == chainID and x.atom_name == "CA":
                newAtomList.append(x.copy())
        if len(newAtomList) != 0:
            return BioPy_Structure(newAtomList)
        else:
            print('Warning no chain %s found"%chainID')

    def get_rgyration(self):
        vc = self.calculate_centre_of_mass()
        num = 0.0
        den = 0.0
        for x in self.atomList:
            vx = x.get_pos_vector()
            dist = vx.dist(vc)
            num = num + x.get_mass() * (dist * dist)
            den = den + x.get_mass()
        rg = np.sqrt(num / den)
        return rg

    def get_atom_zyx_map_indexes(self, map):

        zyx_coordinates = []
        for atom in self.atomList:
            x = int(round((atom.x - map.origin[0]) / map.apix[0], 0))
            y = int(round((atom.y - map.origin[1]) / map.apix[1], 0))
            z = int(round((atom.z - map.origin[2]) / map.apix[2], 0))
            zyx_coordinates.append([z, y, x])

        return np.array(zyx_coordinates)

[docs]    def calculate_centroid(self):
        """
        Return centre of mass of structure as a Vector instance.
        """
        x_Total = 0.0
        y_Total = 0.0
        z_Total = 0.0
        natom = 0.0
        for atom in self.atomList:
            x = atom.get_x()
            y = atom.get_y()
            z = atom.get_z()
            x_Total += x
            y_Total += y
            z_Total += z
            natom += 1
            x_CoM = x_Total / natom
            y_CoM = y_Total / natom
            z_CoM = z_Total / natom
        return Vector.Vector(x_CoM, y_CoM, z_CoM)

[docs]    def combine_structures(self, structList):
        """
        Add a list of Structure instance to the existing structure.

        Arguments:
            *structList*
                list of Structure instance
        Return:
            New Structure Instance
        """
        atomList = self.atomList.copy()
        for s in structList:
            atomList = np.append(atomList, s.atomList)
        return BioPy_Structure(atomList)

[docs]    def combine_SSE_structures(self, structList):
        """
        Combine a list of Structure instance into one and return a new
        Structure instance.

        Arguments:
            *structList*
                list of Structure instance
        Return:
            New Structure Instance
        """
        atomList = []
        for s in structList:
            atomList = np.append(atomList, s.atomList)
        return BioPy_Structure(atomList)

[docs]    def get_selection_more_than(self, startRes):
        """
        Get a Structure instance comprising all residues with their residue
        numbers greater than startRes.

        Arguments:
            *startRes*
                a residue number

        Return:
            A Structure instance
        """
        newAtomList = []
        for x in self.atomList:
            if x.get_res_no() >= int(startRes):
                newAtomList.append(x.copy())
        return BioPy_Structure(newAtomList)

[docs]    def get_selection_less_than(self, endRes):
        """
        Get a Structure instance comprising all residues with their residue
        numbers less than endRes.

        Arguments:
            *endRes*
                a residue number

        Return:
            A Structure instance
        """
        newAtomList = []
        for x in self.atomList:
            if x.get_res_no() <= endRes:
                newAtomList.append(x.copy())
        return BioPy_Structure(newAtomList)

[docs]    def get_residue(self, resNo):
        """
        Get the residue corresponding to the residue number.

        Arguments:
            *resNo*
                Residues number
        Return:
            Returns a Residues instance.
        """
        return BioPy_Structure(
            [x.copy() for x in self.atomList if x.get_res_no() == int(resNo)]
        )

[docs]    def get_atom(self, index):
        """
        Return specific atom in Structure instance.

        Arguments:
            *index*
                Index of the atom
        Return:
            Returns an Atom instance.
        """
        return self.atomList[int(index)]

[docs]    def get_backbone(self):
        """
        Return:
            Structure instance with only the backbone atoms in structure.
        """
        backboneList = []
        for atom in self.atomList:
            if (
                atom.get_name() == "CA"
                or atom.get_name() == "N"
                or atom.get_name() == "C"
            ):
                backboneList.append(atom.copy())
        return BioPy_Structure(backboneList[:])

[docs]    def get_CAonly(self):
        """
        Return:
            Structure instance with only the backbone atoms in structure.
        """
        backboneList = []
        for atom in self.atomList:
            if atom.get_name() == "CA":
                backboneList.append(atom.copy())
            else:
                pass
        return BioPy_Structure(backboneList)

    def vectorise(self):
        vectorList = []
        vList = []
        for x in self.atomList:
            vectorList.append(x.get_pos_vector())
        for y in range(len(vectorList) - 1):
            vList.append(vectorList[y] - (vectorList[y + 1]))
        return vList

[docs]    def get_torsion_angles(self):
        """
        Return:
            List of torsion angles in Structure instance.
        """
        vectorList = self.vectorise()
        angles = []
        for v in range(len(vectorList) - 2):
            angles.append(
                Vector.altTorsion(
                    vectorList[v], vectorList[v + 1].reverse(), vectorList[v + 2]
                )
            )
        return angles

[docs]    def get_prot_mass_from_res(self, Termini=False):
        """
        # ADD by IF 22-4-2013
        #from Harpal  code calculation of mass from seq.
        Calculates Mass (kDa) of the Structure instance, from average mass
        #based on http://web.expasy.org/findmod/findmod_masses.html
        """
        aa = {
            "ARG": "R",
            "HIS": "H",
            "LYS": "K",
            "ASP": "D",
            "GLU": "E",
            "SER": "S",
            "THR": "T",
            "ASN": "N",
            "GLN": "Q",
            "CYS": "C",
            "SEC": "U",
            "GLY": "G",
            "PRO": "P",
            "ALA": "A",
            "ILE": "I",
            "LEU": "L",
            "MET": "M",
            "PHE": "F",
            "TRP": "W",
            "TYR": "Y",
            "VAL": "V",
        }
        mass_tot = 0
        str = self.copy()
        seq_string = ""
        for chain in str.split_into_chains():
            seq_list_resno = []
            for x in chain.atomList:
                if x.res in aa.keys():
                    if x.res_no not in seq_list_resno:
                        seq_list_resno.append(x.res_no)
                        if x.res not in aa.keys():
                            seq_string += "x"
                        res_singleletter = aa[x.res]
                        seq_string += "%s" % res_singleletter
                        mass_tot += AA_MASS[res_singleletter]
                else:
                    pass
        if Termini:
            mass_tot += 17.992
        return float(mass_tot / 1000)

[docs]    def get_prot_mass_from_atoms(self):
        """
        Calculates Mass (kDa) of the Structure instance, from average mass.
        Atoms based use get_prot_mass_from_res is more accurate.
        """
        mass_tot = 0
        for x in self.atomList:
            mass_tot += x.get_mass()
        return float(mass_tot / 1000)


[docs]class gemmi_Structure(BioPy_Structure):
    """
    Class for representing a protein structure.

    Extends BioPy_Structure to deal with additional mmCif labels for dividing
    atoms into entities, asym units and chains.


    """

    def __init__(
        self,
        atomList,
        gemmi_structure,
        filename="Unknown",
        header="",
        tempy_scores=[],
    ):
        """
        Initialise using a string of the relevant pdb file name or a numpy
        array of Atom objects.

        Arguments:
            *atomList*
                String of pdb file name or array of Atom objects
            *gemmi_structure*
                An instance of a gemmi structure object generated from an mmcif
                or pdb file
        """
        super().__init__(atomList)

        self.header = header
        self.pdb_id = gemmi_structure.name
        self.filename = filename

        self.chain_indexes = dict()
        self.asym_id_indexes = dict()
        self.entity_id_indexes = dict()
        self.set_chains_asymids_entities()

        self.local_scores = collections.OrderedDict()
        self.local_score_labels = {}
        self.global_scores = collections.OrderedDict()
        self.read_tempy_scores(tempy_scores)

    def __getitem__(self, index):
        return self.atomList[index]

    def __len__(self):
        return len(self.atomList)

    def __repr__(self):
        s = "Gemmi_Structure: %s \n" % (self.pdb_id)
        if not self.filename == "Unknown":
            s += ", Filename: " + self.filename + "\n"
        else:
            pass
        s += "No Of Atoms: " + str(len(self)) + "\n"
        s += "First Atom: " + str(self.atomList[0]) + "\n"
        s += "Last Atom: " + str(self.atomList[-1])
        return s

    def get_pdb_id(self):
        return self.pdb_id

[docs]    def copy(self):
        return copy.deepcopy(self)

    def get_chain_indexes(self):
        chain_indexes = collections.OrderedDict()

        for n, atom in enumerate(self.atomList):
            try:
                chain_indexes[atom.chain].append(n)
            except KeyError:
                chain_indexes[atom.chain] = [n]

        self.chain_indexes = chain_indexes

    def get_asym_indexes(self):
        asym_indexes = collections.OrderedDict()

        for n, atom in enumerate(self.atomList):
            try:
                asym_indexes[atom.asym_id].append(n)
            except KeyError:
                asym_indexes[atom.asym_id] = [n]

        self.asym_id_indexes = asym_indexes

    def get_entity_indexes(self):
        entity_indexes = collections.OrderedDict()

        for n, atom in enumerate(self.atomList):
            try:
                entity_indexes[atom.model].append(n)
            except KeyError:
                entity_indexes[atom.model] = [n]

        self.entity_id_indexes = entity_indexes

    def set_chains_asymids_entities(self):
        chains = {}
        asym_ids = {}
        entities = {}

        for ind, atom in enumerate(self.atomList):
            try:
                chains[atom.chain].append(ind)
            except KeyError:
                chains[atom.chain] = [ind]

            try:
                asym_ids[atom.asym_id].append(ind)
            except KeyError:
                asym_ids[atom.asym_id] = [ind]

            try:
                entities[atom.model].append(ind)
            except KeyError:
                entities[atom.model] = [ind]

        self.chain_indexes = chains
        self.asym_id_indexes = asym_ids
        self.entity_id_indexes = entities

    def get_asym_id_atoms(self, asym_id):

        return self.atomList[self.asym_id_indexes[asym_id]]

    def get_chain_atoms(self, chain_id):

        return self.atomList[self.chain_indexes[chain_id]]

    def get_entity_atoms(self, entity_id):

        return self.atomList[self.entity_id_indexes[entity_id]]

    def get_next_entity_id(self, entity_id):

        try:
            old_entity_id = int(entity_id)
        except ValueError:
            old_entity_id = 0

        return str(old_entity_id + 1)

    def reorder_atoms(self):
        new_atom_list = []
        serial = 1

        for chain, indexes in self.chain_indexes.items():
            atoms_from_chain = self.atomList[indexes]
            for atom in atoms_from_chain:
                atom.serial = serial
                new_atom_list.append(atom)
                serial += 1

        self.atomList = np.array(new_atom_list)
        self.set_chains_asymids_entities()

[docs]    def set_atom_labels(self):
        """
        Sets chain, asym_id and entity_id labels for the atoms in atomList

        Important to run this before file writing
        """

        for chain_label, indexes in self.chain_indexes.items():
            for atom in self.atomList[indexes]:
                atom.chain = chain_label

        for asym_label, indexes in self.asym_id_indexes.items():
            for atom in self.atomList[indexes]:
                atom.asym_id = asym_label

        for entity_id, indexes in self.entity_id_indexes.items():
            for atom in self.atomList[indexes]:
                atom.model = str(entity_id)

[docs]    def split_into_chains(self):
        """
        Overrides BioPy_Structure method

        Split the structure into separate chains and returns the list of
        Structure instance for each chain.
        """
        new_structure = self.copy()
        structList = []
        for chain_name in self.chain_indexes.keys():
            structList.append(new_structure.get_chain(chain_name))

        return structList

[docs]    def get_chain(self, chainID):
        """
        Return:
            New Structure instance with only the requested chain.
        """
        new_atom_list = self.atomList[self.chain_indexes[chainID]]

        struct = gemmi.Structure()
        struct.name = self.pdb_id + "chain%s" % (chainID)

        return gemmi_Structure(new_atom_list, struct)

[docs]    def add_structure_instance(
        self,
        new_structure,
        integrate=False,
    ):
        """Add a seperate structure instance to self

        **Arguements:**
            *new_structure*
                the new structure instance which will be incorpoated into self
        **Optional Arguments:**
            *integrate*
                If True, atoms from the new_structure will be incorporated into
                chains with the same name in self.

        """

        target_struct = new_structure.copy()

        # set up stuff
        if not self.chain_indexes:
            self.set_chains_asymids_entities()
        available_chain_labels = [char for char in ALPHABET]

        for key in self.chain_indexes.keys():
            try:
                available_chain_labels.pop(available_chain_labels.index(key))
            except ValueError:  # occurs if chains have label not in ALPHABET
                continue

        # get lists of current labels
        existing_chains = self.chain_indexes.keys()
        existing_asym = self.asym_id_indexes.keys()
        existing_entities = self.entity_id_indexes.keys()

        # prep new labels in case they are needed
        new_chain_id = self.get_next_chain_id(
            self.atomList[-1].chain,
            chain_list=available_chain_labels,
        )
        new_asym_id = self.iter_asym_id(self.atomList[-1].asym_id)
        while new_asym_id in existing_asym:
            new_asym_id = self.iter_asym_id(new_asym_id)
        new_entity_id = self.get_next_entity_id(self.atomList[-1].model)
        while new_entity_id in existing_entities:
            new_entity_id = self.get_next_entity_id(new_entity_id)

        # get lists of current labels
        existing_chains = list(self.chain_indexes.keys())
        existing_asym = list(self.asym_id_indexes.keys())
        existing_entities = list(self.entity_id_indexes.keys())

        if not integrate:
            for new_chain, indexes in target_struct.chain_indexes.items():
                if new_chain in existing_chains:
                    for ind in indexes:
                        target_struct[ind].chain = new_chain_id
                    new_chain_id = self.get_next_chain_id(
                        new_chain_id,
                        chain_list=available_chain_labels,
                    )
                if target_struct[indexes[0]].asym_id in existing_asym:
                    existing_asym.append(new_asym_id)
                    for ind in indexes:
                        target_struct[ind].asym_id = new_asym_id
                    while new_asym_id in existing_asym:
                        new_asym_id = self.iter_asym_id(new_asym_id)

                if target_struct[indexes[0]].model in existing_entities:
                    existing_entities.append(new_entity_id)
                    for ind in indexes:
                        target_struct[ind].model = new_entity_id
                    while new_entity_id in existing_entities:
                        new_entity_id = self.get_next_entity_id(new_entity_id)

                new_atoms = target_struct.atomList[indexes]
                self.atomList = np.append(self.atomList, new_atoms)
            self.set_chains_asymids_entities()
        else:
            target_struct.set_chains_asymids_entities()
            for chain, indexes in target_struct.chain_indexes.items():
                new_atoms = target_struct.atomList[indexes]
                self.atomList = np.append(self.atomList, new_atoms)

            self.set_chains_asymids_entities()
            self.reorder_atoms()

[docs]    def add_structure(
        self,
        filename,
        integrate=False,
        hetatm=False,
        water=False,
    ):
        """
        Add structure from file
        """

        import TEMPy.protein.structure_parser as parser

        if filename.endswith(".cif"):
            new_structure = parser.mmCIFParser.read_mmCIF_file(
                filename,
                hetatm=hetatm,
                water=water,
            )
        elif filename.endswith(".pdb"):
            new_structure = parser.PDBParser.read_PDB_file(
                filename, hetatm=hetatm, water=water
            )
        else:
            raise TypeError("Unrecognised file type for file merging")
        self.add_structure_instance(
            new_structure,
            integrate=integrate,
        )
        del new_structure

[docs]    def combine_structures(self, structList):
        """Combine a list of structure instances with self and returns the new
        structure as a seperate instance.

        Note: this function does not mutate self.

        **Arguments:**
            *structList*
                List of Structure instances
        **Returns:**
            *combined_struct*
                New gemmi_Structure instance
        """
        combined_struct = self.copy()
        for s in structList:
            combined_struct.add_structure_instance(s)
        return combined_struct

[docs]    def write_to_mmcif(
        self,
        filename,
        hetatom=False,
        save_all_metadata=False,
    ):
        """Write Structure instance to new mmCif file.

        Arguments:
            *filename*
                output filename.
        """
        cif_instance = mmcif_writer(
            self,
            filename,
            save_all_metadata=save_all_metadata,
        )

        if filename[-4:] == ".cif":
            pass
        else:
            filename += ".cif"

        self.header = mmcif_writer.clean_header(cif_instance.header)

        f = open(filename, "w")
        f.write("data_" + self.get_pdb_id() + "\n#\n")

        # add header info into gemmi block
        if self.header:
            header_tags = self.header["tags"]
            for tag in header_tags:

                if mmcif_writer.is_pair(self.header[tag]):
                    for line in mmcif_writer.get_pairs_str_list(tag, self.header[tag]):
                        f.write(line + "\n")
                else:
                    for line in mmcif_writer.get_table_str_list(tag, self.header[tag]):
                        f.write(line + "\n")
                f.write("#\n")

        for line in mmcif_writer.get_atoms_str_list(self.atomList):
            f.write(line + "\n")
        f.write("#\n")

        if self.local_scores:
            mmcif_writer.write_tempy_localscores(
                f, self.local_scores, self.local_score_labels
            )
            f.write("#\n")

        if self.global_scores:
            global_scores = mmcif_writer.get_global_score_pairs(self.global_scores)

            for line in global_scores:
                f.write(line + "\n")
            f.write("#\n")

        f.close()
        print("Protein structure saved as %s" % (filename))

    def get_model_atoms(self, model_ID):
        """ """
        return self.atomList[self.entity_id_indexes[model_ID]]

[docs]    def rename_asym_labels(self, label_list=False, starting_val=False):
        """Renames asym_id for each atom in alphabetical order

        For structures with more than 26 models the ids switch to doulbe
        lettering (i.e. from Z -> AA and from ZZZ -> AAAA)
        """
        if not starting_val:
            if label_list:
                new_asym_id = label_list[0]
            else:
                new_asym_id = "A"
        else:
            new_asym_id = starting_val
        new_asym_id_indexes = collections.OrderedDict()

        for old_asym_id, indexes in self.asym_id_indexes.items():
            new_asym_id_indexes[new_asym_id] = indexes
            for atom in self.atomList[indexes]:
                atom.asym_id = new_asym_id
            if label_list:
                new_asym_id = self.get_next_chain_id(new_asym_id, label_list)
            else:
                new_asym_id = self.iter_asym_id(new_asym_id)

        self.asym_id_indexes = new_asym_id_indexes

[docs]    def rename_entity_labels(self, starting_val=1):
        """
        Renames asym_id for each atom in alphabetical order - for structures
        with more than 26 models the ids switch to doulbe lettering (i.e. from
        Z -> AA and from ZZZ -> AAAA)
        """
        new_entity_id = int(starting_val)

        new_entity_id_indexes = collections.OrderedDict()

        for old_entity_id, indexes in self.entity_id_indexes.items():
            new_entity_id_indexes[str(new_entity_id)] = indexes
            for atom in self.atomList[indexes]:
                atom.model = str(new_entity_id)
            new_entity_id += 1

        self.entity_id_indexes = new_entity_id_indexes

[docs]    def rename_chains(self, chain_list=False, starting_val=False):
        """Rename chain labels based on the list of new chain names

        **Optional Arguments:**
             *chain_list*
                 List of chain names
                 If False rename in alphabetical order.
        """
        if not chain_list:
            chain_list = ALPHABET
        else:
            if len(chain_list) == 0:
                print("Length of chain list = " + str(len(chain_list)))
                print("Chains not changed.")
                return

        if starting_val:
            new_chain = starting_val
        else:
            new_chain = chain_list[0]
        new_chains_dict = collections.OrderedDict()

        for old_chain_label, indexes in self.chain_indexes.items():
            new_chains_dict[new_chain] = indexes
            for atom in self.atomList[indexes]:
                atom.chain = new_chain
            new_chain = self.get_next_chain_id(
                new_chain,
                chain_list=chain_list,
            )
        self.chain_indexes = new_chains_dict

[docs]    def no_of_chains(self):
        """
        Return:
            the number of chains in the Structure object
        """
        return len(self.chain_indexes.keys())

[docs]    def set_local_score(self, score_name, scores, verbose=False):
        """
        Function to incorporate local scores into structure instance, so that
        these values can be written into new cif files

        This would be efficiently handled using a pandas library
        """
        if not self.local_scores:
            self.local_scores = collections.OrderedDict()
            for chain in scores:
                for res_no in scores[chain]:
                    self.local_scores[(chain, res_no)] = [scores[chain][res_no]]
                    self.local_score_labels[score_name] = 0
        else:
            # for updating existing entries
            try:
                self.local_score_labels[score_name]
                if verbose:
                    print("Updating values for %s score" % (score_name))
                for chain in scores:
                    for res_no in scores[chain]:
                        self.local_scores[(chain, res_no)] = [scores[chain][res_no]]
            # if score entry doesn't exist make a new one
            except KeyError:
                for chain in scores:
                    for res_no in scores[chain]:
                        self.local_scores[(chain, res_no)].append(scores[chain][res_no])
                        self.local_score_labels[score_name] = (
                            len(self.local_scores[(chain, res_no)]) + 1
                        )

    def get_aa_local_score(self, score_name, chain, aa_no):
        index = self.local_score_labels[score_name]
        return self.local_scores[(chain, aa_no)][index]

[docs]    def set_global_score(self, score_name, score):
        """
        Function to incorporate global scores into the structure instance so
        these values can be written into the cif file as label-value pairs
        """

        self.global_scores[score_name] = score

[docs]    def get_global_score(self, score_name):
        """
        return global score
        """

        try:
            return self.global_scores[score_name]
        except KeyError:
            raise KeyError("The global score %s does not exist" % (score_name))

    def read_tempy_scores(self, tempy_scores):

        for tag in tempy_scores:
            if tag.endswith("local_score."):

                scoring_block = tempy_scores[tag]
                scoring_values = [scoring_block[key] for key in scoring_block]

                for n, label in enumerate(scoring_block.keys()):
                    if n < 2:
                        continue
                    self.local_score_labels[label] = n - 2

                for i in range(len(scoring_values[0])):
                    chain = scoring_values[0][i]
                    aa_no = int(scoring_values[1][i])
                    for n in range(2, len(scoring_block.keys())):
                        score_value = float(scoring_values[n][i])
                        try:
                            self.local_scores[chain, aa_no].append(score_value)
                        except KeyError:
                            self.local_scores[chain, aa_no] = [score_value]

            elif tag.endswith("global_score."):
                for n, label in enumerate(tempy_scores[tag].keys()):
                    self.global_score[label] = tempy_scores[tag]