Source code for xrayutilities.materials.cif

# This file is part of xrayutilities.
#
# xrayutilities is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
# Copyright (c) 2010-2021, 2023 Dominik Kriegner <dominik.kriegner@gmail.com>

import copy
import io
import itertools
import operator
import os
import re
import shlex

import numpy

from .. import config
from . import elements
from . import spacegrouplattice as sgl
from . import wyckpos

re_data = re.compile(r"^data_", re.IGNORECASE)
re_loop = re.compile(r"^loop_", re.IGNORECASE)
re_symop = re.compile(
    r"^\s*("
    "_space_group_symop_operation_xyz|"
    "_symmetry_equiv_pos_as_xyz)",
    re.IGNORECASE,
)
re_name = re.compile(r"^\s*_chemical_formula_sum", re.IGNORECASE)
re_atom = re.compile(r"^\s*_atom_site_label\s*$", re.IGNORECASE)
re_atomtyp = re.compile(r"^\s*_atom_site_type_symbol\s*$", re.IGNORECASE)
re_atomx = re.compile(r"^\s*_atom_site_fract_x", re.IGNORECASE)
re_atomy = re.compile(r"^\s*_atom_site_fract_y", re.IGNORECASE)
re_atomz = re.compile(r"^\s*_atom_site_fract_z", re.IGNORECASE)
re_uiso = re.compile(r"^\s*_atom_site_U_iso_or_equiv", re.IGNORECASE)
re_biso = re.compile(r"^\s*_atom_site_B_iso_or_equiv", re.IGNORECASE)
re_atomocc = re.compile(r"^\s*_atom_site_occupancy", re.IGNORECASE)
re_labelline = re.compile(r"^\s*_")
re_emptyline = re.compile(r"^\s*$")
re_quote = re.compile(r"'")
re_spacegroupnr = re.compile(
    r"^\s*(_space_group_IT_number|"
    "_symmetry_Int_Tables_number)",
    re.IGNORECASE,
)
re_spacegroupname = re.compile(
    r"^\s*(_symmetry_space_group_name_H-M|"
    "_space_group_name_H-M_alt)",
    re.IGNORECASE,
)
re_spacegroupsetting = re.compile(r"^\s*_symmetry_cell_setting", re.IGNORECASE)
re_cell_a = re.compile(r"^\s*_cell_length_a", re.IGNORECASE)
re_cell_b = re.compile(r"^\s*_cell_length_b", re.IGNORECASE)
re_cell_c = re.compile(r"^\s*_cell_length_c", re.IGNORECASE)
re_cell_alpha = re.compile(r"^\s*_cell_angle_alpha", re.IGNORECASE)
re_cell_beta = re.compile(r"^\s*_cell_angle_beta", re.IGNORECASE)
re_cell_gamma = re.compile(r"^\s*_cell_angle_gamma", re.IGNORECASE)
re_comment = re.compile(r"^\s*#")



[docs]
class CIFFile:
    """
    class for parsing CIF (Crystallographic Information File) files. The class
    aims to provide an additional way of creating material classes instead of
    manual entering of the information the lattice constants and unit cell
    structure are parsed from the CIF file.

    If multiple datasets are present in the CIF file this class will attempt to
    parse all of them into the the data dictionary. By default all methods
    access the first data set found in the file.
    """


[docs]
    def __init__(self, filestr, digits=4):
        """
        initialization of the CIFFile class

        Parameters
        ----------
        filestr :  str, bytes
            CIF filename or string representation of the CIF file
        digits :    int, optional
            number of digits to check if position is unique
        """
        self.digits = digits

        if os.path.isfile(filestr):
            self.filename = filestr
            try:
                fid = open(self.filename, "rb")
            except OSError as exc:
                raise IOError(f"cannot open CIF file {self.filename}") from exc
        else:
            if filestr.count("\n") == 0:
                print(
                    'XU.materials.CIFFile: "filestr" contains only one line '
                    "but a file with that name does not exist! Continuing "
                    "with the assumption this one line string is the "
                    "content of a CIF file!"
                )
            self.filename = "__from_str__"
            if isinstance(filestr, bytes):
                fid = io.BytesIO(filestr)
            else:
                fid = io.BytesIO(bytes(filestr.encode("ascii")))

        if config.VERBOSITY >= config.INFO_ALL:
            print(f"XU.materials: parsing CIF file {self.filename}")
        self.default_dataset = None
        self.data = {}
        self.Parse(fid)
        fid.close()



[docs]
    def Parse(self, fid):
        """
        function to parse a CIF file. The function reads all the included data
        sets and adds them to the data dictionary.

        """
        fidpos = fid.tell()
        while True:
            line = fid.readline()
            if not line:
                break
            fidpos = fid.tell()
            line = line.decode("ascii", "ignore")
            m = re_data.match(line)
            if m:
                fid.seek(fidpos)
                name = line[m.end() :].strip()
                self.data[name] = CIFDataset(fid, name, self.digits)
                if self.data[name].has_atoms and not self.default_dataset:
                    self.default_dataset = name



[docs]
    def SGLattice(self, dataset=None, use_p1=False):
        """
        create a SGLattice object with the structure from the CIF dataset

        Parameters
        ----------
        dataset :   str, optional
            name of the dataset to use. if None the default one will be used.
        use_p1 :    bool, optional
            force the use of P1 symmetry, default False
        """
        if not dataset:
            dataset = self.default_dataset
        return self.data[dataset].SGLattice(use_p1=use_p1)


    def __str__(self):
        """
        returns a string with positions and names of the atoms for all datasets
        """
        ostr = ""
        ostr += f"CIF-File: {self.filename}\n"
        for name, dataset in self.data.items():
            ostr += f"\nDataset: {name}"
            if name == self.default_dataset:
                ostr += " (default)"
            ostr += "\n"
            ostr += str(dataset)
        return ostr




[docs]
class CIFDataset:
    """
    class for parsing CIF (Crystallographic Information File) files. The class
    aims to provide an additional way of creating material classes instead of
    manual entering of the information the lattice constants and unit cell
    structure are parsed from the CIF file
    """


[docs]
    def __init__(self, fid, name, digits):
        """
        initialization of the CIFDataset class. This class parses one data
        block.

        Parameters
        ----------
        fid :       filehandle
            file handle set to the beginning of the data block to be parsed
        name :      str
            identifier string of the dataset
        digits :    int
            number of digits to check if position is unique
        """
        self.name = name
        self.digits = digits
        self.has_atoms = False

        if config.VERBOSITY >= config.INFO_ALL:
            print(f"XU.materials: parsing cif dataset {self.name}")
        self.Parse(fid)
        self.SymStruct()



[docs]
    def Parse(self, fid):
        """
        function to parse a CIF data set. The function reads the
        space group symmetry operations and the basic atom positions
        as well as the lattice constants and unit cell angles
        """

        self.symops = []
        self.atoms = []
        self.lattice_const = numpy.zeros(3, dtype=numpy.double)
        self.lattice_angles = numpy.zeros(3, dtype=numpy.double)

        loop_start = False
        symop_loop = False
        atom_loop = False

        def get_element(cifstring):
            el = re.sub(r"['\"]", r"", cifstring)
            if "+" in el or "-" in el:
                # add oxidation number if not present
                for sign in ("+", "-"):
                    if sign in el:
                        if not el[el.index(sign) - 1].isdigit():
                            signidx = el.index(sign)
                            el = el[:signidx] + "1" + el[signidx:]
                # replace special characters
                for r, o in zip(("dot", "p", "m"), (".", "+", "-")):
                    el = el.replace(o, r)
            else:
                el = re.sub(r"([0-9])", r"", el)
            el = re.sub(r"\(\w*\)", r"", el)
            try:
                element = getattr(elements, el)
            except AttributeError:  # el not found, typ. due to oxidation state
                f = re.search("[0-9]", el)
                if not f and el == "?":
                    element = elements.Dummy
                elif f is None:
                    raise ValueError(
                        "XU.materials: element ('%s') could not"
                        " be identified as chemical element. Only"
                        " abbreviations of element names are "
                        "supported." % (cifstring)
                    )
                else:
                    elname = el[: f.start()]
                    if hasattr(elements, elname):
                        # here one might want to find a closer alternative than
                        # the neutral atom, but the effect this has should be
                        # minimal, currently simply the neutral atom is used
                        if config.VERBOSITY >= config.INFO_LOW:
                            print(
                                f"XU.materials: Warning: element {elname} "
                                f"used instead of {cifstring}"
                            )
                        element = getattr(elements, elname)
                    else:
                        raise ValueError(
                            "XU.materials: element ('%s') could "
                            "not be found" % (cifstring)
                        )
            return element

        def floatconv(string):
            """
            helper function to convert string with possible error
            given in brackets to float
            """
            try:
                f = float(re.sub(r"\(.+\)", r"", string))
            except ValueError:
                f = numpy.nan
            return f

        def intconv(string):
            """
            helper function to convert string to integer
            """
            try:
                i = int(string)
            except ValueError:
                i = None
            return i

        fidpos = fid.tell()
        for line in fid.readlines():
            linelen = len(line)
            line = line.decode("ascii", "ignore")
            if config.VERBOSITY >= config.DEBUG:
                print(line)
                print(fid.tell(), fidpos)

            if re_data.match(line):
                fid.seek(fidpos)
                break
            fidpos += linelen

            # ignore comment lines
            if re_comment.match(line):
                continue

            if re_loop.match(line):  # start of loop
                if config.VERBOSITY >= config.DEBUG:
                    print("XU.materials: loop start found")
                loop_start = True
                loop_labels = []
                symop_loop = False
                symop_idx = None
                atom_loop = False
                alab_idx = None
                ax_idx = None
                ay_idx = None
                az_idx = None
                uiso_idx = None
                biso_idx = None
                occ_idx = None
            elif re_labelline.match(line):
                if re_cell_a.match(line):
                    self.lattice_const[0] = floatconv(line.split()[1])
                elif re_cell_b.match(line):
                    self.lattice_const[1] = floatconv(line.split()[1])
                elif re_cell_c.match(line):
                    self.lattice_const[2] = floatconv(line.split()[1])
                elif re_cell_alpha.match(line):
                    self.lattice_angles[0] = floatconv(line.split()[1])
                elif re_cell_beta.match(line):
                    self.lattice_angles[1] = floatconv(line.split()[1])
                elif re_cell_gamma.match(line):
                    self.lattice_angles[2] = floatconv(line.split()[1])
                elif re_spacegroupnr.match(line):
                    i = intconv(line.split()[1])
                    if i:
                        self.sgrp_nr = i
                elif re_spacegroupname.match(line):
                    self.sgrp_name = "".join(line.split()[1:]).strip("'")
                elif re_spacegroupsetting.match(line):
                    i = intconv(line.split()[1])
                    if i:
                        self.sgrp_setting = i
                elif re_name.match(line):
                    try:
                        self.name = shlex.split(line)[1]
                    except IndexError:
                        self.name = None
                if loop_start:
                    loop_labels.append(line.strip())
                    if re_symop.match(line):  # start of symmetry op. loop
                        if config.VERBOSITY >= config.DEBUG:
                            print("XU.materials: symop-loop identified")
                        symop_loop = True
                        symop_idx = len(loop_labels) - 1
                    elif re_atom.match(line) or re_atomtyp.match(line):
                        # start of atom position loop
                        if config.VERBOSITY >= config.DEBUG:
                            print("XU.materials: atom position-loop found")
                        atom_loop = True
                        if re_atomtyp.match(line):
                            alab_idx = len(loop_labels) - 1
                        elif not list(filter(re_atomtyp.match, loop_labels)):
                            # ensure precedence of atom_site_type_symbol
                            alab_idx = len(loop_labels) - 1
                    elif re_atomx.match(line):
                        ax_idx = len(loop_labels) - 1
                        if config.VERBOSITY >= config.DEBUG:
                            print(
                                "XU.materials: atom position x: col%d" % ax_idx
                            )
                    elif re_atomy.match(line):
                        ay_idx = len(loop_labels) - 1
                    elif re_atomz.match(line):
                        az_idx = len(loop_labels) - 1
                    elif re_uiso.match(line):
                        uiso_idx = len(loop_labels) - 1
                    elif re_biso.match(line):
                        biso_idx = len(loop_labels) - 1
                    elif re_atomocc.match(line):
                        occ_idx = len(loop_labels) - 1

            elif re_emptyline.match(line):
                loop_start = False
                symop_loop = False
                atom_loop = False
                continue
            elif symop_loop:  # symmetry operation entry
                loop_start = False
                entry = shlex.split(line)[symop_idx]
                if re_quote.match(entry):
                    opstr = entry
                else:
                    opstr = "'" + entry + "'"
                opstr = re.sub(r"^'", r"(", opstr)
                opstr = re.sub(r"'$", r")", opstr)
                self.symops.append(opstr)
            elif atom_loop:  # atom label and position
                loop_start = False
                asplit = line.split()
                try:
                    atom = get_element(asplit[alab_idx])
                    apos = (
                        floatconv(asplit[ax_idx]),
                        floatconv(asplit[ay_idx]),
                        floatconv(asplit[az_idx]),
                    )
                    occ = floatconv(asplit[occ_idx]) if occ_idx else 1
                    if numpy.isnan(occ):
                        occ = 1
                    uiso = floatconv(asplit[uiso_idx]) if uiso_idx else 0
                    biso = floatconv(asplit[biso_idx]) if biso_idx else 0
                    if numpy.isnan(uiso):
                        uiso = 0
                    if numpy.isnan(biso):
                        biso = 0
                    if biso == 0:
                        biso = 8 * numpy.pi**2 * uiso
                    self.atoms.append((atom, apos, occ, biso))
                except IndexError:
                    if config.VERBOSITY >= config.INFO_LOW:
                        print(
                            'XU.materials: could not parse atom line: "%s"'
                            % line.strip()
                        )
        if self.atoms:
            self.has_atoms = True



[docs]
    def SymStruct(self):
        """
        function to obtain the list of different atom positions in the unit
        cell for the different types of atoms and determine the space group
        number and origin choice if available. The data are obtained from the
        data parsed from the CIF file.
        """

        def rem_white(string):
            return string.replace(" ", "")

        if hasattr(self, "sgrp_name"):
            # determine spacegroup
            cifsgn = rem_white(self.sgrp_name).split(":")[0]
            for nr, name in sgl.sgrp_name.items():
                if cifsgn == rem_white(name):
                    self.sgrp_nr = int(nr)
            if not hasattr(self, "sgrp_nr"):
                # try ignoring the minuses
                for nr, name in sgl.sgrp_name.items():
                    if cifsgn == rem_white(name.replace("-", "")):
                        self.sgrp_nr = int(nr)
            if len(self.sgrp_name.split(":")) > 1:
                self.sgrp_suf = ":" + self.sgrp_name.split(":")[1]
            elif hasattr(self, "sgrp_setting"):
                self.sgrp_suf = ":%d" % self.sgrp_setting

        if not hasattr(self, "sgrp_suf"):
            if hasattr(self, "sgrp_nr"):
                self.sgrp_suf = sgl.get_possible_sgrp_suf(self.sgrp_nr)
            else:
                self.sgrp_suf = ""
        if isinstance(self.sgrp_suf, str):
            suffixes = [
                self.sgrp_suf,
            ]
        else:
            suffixes = copy.copy(self.sgrp_suf)
        for sgrp_suf in suffixes:
            self.sgrp_suf = sgrp_suf
            if hasattr(self, "sgrp_nr"):
                self.sgrp = str(self.sgrp_nr) + self.sgrp_suf
                if config.VERBOSITY >= config.INFO_ALL:
                    print(f"XU.materials: trying space group {self.sgrp}")

            # determine all unique positions for definition of a P1 space group
            symops = self.symops
            if not symops and hasattr(self, "sgrp") and self.atoms:
                label = sorted(
                    wyckpos.wp[self.sgrp], key=lambda s: int(s[:-1])
                )
                symops = wyckpos.wp[self.sgrp][label[-1]][1]
                if config.VERBOSITY >= config.INFO_ALL:
                    print(
                        "XU.materials: no symmetry operations in CIF-Dataset"
                        "; using built in general positions."
                    )
            self.unique_positions = []
            for el, (x, y, z), occ, biso in self.atoms:
                unique_pos = set()
                for symop in symops:
                    pos = eval(symop, {"x": x, "y": y, "z": z})
                    pos = numpy.asarray(pos)
                    # check that position is within unit cell
                    pos = pos - numpy.round(pos, self.digits) // 1
                    unique_pos.add(tuple(numpy.round(pos, self.digits)))
                self.unique_positions.append((el, unique_pos, occ, biso))

            # determine Wyckoff positions and free parameters of unit cell
            if hasattr(self, "sgrp"):
                self.wp = []
                self.occ = []
                self.elements = []
                self.biso = []
                keys = list(wyckpos.wp[self.sgrp])
                wpn = [int(re.sub(r"([a-zA-Z])", r"", k)) for k in keys]
                for i, (el, (x, y, z), occ, biso) in enumerate(self.atoms):
                    # candidate positions from number of unique atoms
                    natoms = len(self.unique_positions[i][1])
                    wpcand = []
                    for j, n in enumerate(wpn):
                        if n == natoms:
                            wpcand.append(
                                (keys[j], wyckpos.wp[self.sgrp][keys[j]])
                            )
                    for j, (k, wp) in enumerate(
                        sorted(wpcand, key=operator.itemgetter(1))
                    ):
                        parint, poslist, _ = wp
                        for positem in poslist:
                            foundwp, xyz = sgl.testwp(
                                parint, positem, (x, y, z), self.digits
                            )
                            if foundwp:
                                if xyz is None:
                                    self.wp.append(k)
                                else:
                                    self.wp.append((k, xyz))
                                self.elements.append(el)
                                self.occ.append(occ)
                                self.biso.append(biso)
                                break
                        if foundwp:
                            break
                if config.VERBOSITY >= config.INFO_ALL:
                    print(
                        f"XU.materials: {len(self.wp):d} of "
                        f"{len(self.atoms):d} Wyckoff positions identified"
                    )
                    if len(self.wp) < len(self.atoms):
                        print(
                            f"XU.materials: space group {self.sgrp} seems "
                            "not to fit"
                        )

                # free unit cell parameters
                self.crystal_system, _ = sgl.sgrp_sym[self.sgrp_nr]
                self.crystal_system += self.sgrp_suf
                self.uc_params = []
                p2i = {
                    "a": 0,
                    "b": 1,
                    "c": 2,
                    "alpha": 0,
                    "beta": 1,
                    "gamma": 2,
                }
                for pname in sgl.sgrp_params[self.crystal_system][0]:
                    if pname in ("a", "b", "c"):
                        self.uc_params.append(self.lattice_const[p2i[pname]])
                    if pname in ("alpha", "beta", "gamma"):
                        self.uc_params.append(self.lattice_angles[p2i[pname]])

                if len(self.wp) == len(self.atoms):
                    if config.VERBOSITY >= config.INFO_ALL:
                        print(
                            "XU.materials: identified space group as "
                            f"{self.sgrp}"
                        )
                    break



[docs]
    def SGLattice(self, use_p1=False):
        """
        create a SGLattice object with the structure from the CIF file
        """
        if not use_p1:
            if hasattr(self, "sgrp"):
                if len(self.wp) == len(self.atoms):
                    return sgl.SGLattice(
                        self.sgrp,
                        *self.uc_params,
                        atoms=self.elements,
                        pos=self.wp,
                        occ=self.occ,
                        b=self.biso,
                    )
                if config.VERBOSITY >= config.INFO_LOW:
                    print("XU.materials: Wyckoff positions missing, using P1")
            else:
                if config.VERBOSITY >= config.INFO_LOW:
                    print(
                        "XU.materials: space-group detection failed, using P1"
                    )

        atomdict = {"atoms": [], "pos": [], "occ": [], "b": []}
        for element, positions, o, b in self.unique_positions:
            for p in positions:
                atomdict["atoms"].append(element)
                atomdict["pos"].append(("1a", p))
                atomdict["occ"].append(o)
                atomdict["b"].append(b)

        return sgl.SGLattice(
            1,
            *itertools.chain(self.lattice_const, self.lattice_angles),
            **atomdict,
        )


    def __str__(self):
        """
        returns a string with positions and names of the atoms
        """
        ostr = ""
        ostr += "unit cell structure:"
        if hasattr(self, "sgrp"):
            ostr += " %s %s %s\n" % (
                self.sgrp,
                self.crystal_system,
                getattr(self, "sgrp_name", ""),
            )
        else:
            ostr += "\n"
        ostr += "a: %8.4f b: %8.4f c: %8.4f\n" % tuple(self.lattice_const)
        ostr += "alpha: %6.2f beta: %6.2f gamma: %6.2f\n" % tuple(
            self.lattice_angles
        )
        if self.unique_positions:
            ostr += "Unique atom positions in unit cell\n"
        for atom in self.unique_positions:
            ostr += atom[0].name + " (%d): \n" % atom[0].num
            for pos in atom[1]:
                ostr += str(numpy.round(pos, self.digits)) + "\n"
        return ostr




[docs]
def cifexport(filename, mat):
    """
    function to export a Crystal instance to CIF file. This in particular
    includes the atomic coordinates, however, ignores for example the elastic
    parameters.
    """

    def unique_label(basename, names):
        num = 1
        name = f"{basename}{num:d}"
        while name in names:
            num += 1
            name = f"{basename}{num:d}"
        return name

    general = """data_global
_chemical_formula_sum '{chemsum}'
_cell_length_a {a:.5f}
_cell_length_b {b:.5f}
_cell_length_c {c:.5f}
_cell_angle_alpha {alpha:.4f}
_cell_angle_beta {beta:.4f}
_cell_angle_gamma {gamma:.4f}
_cell_volume {vol:.3f}
_space_group_crystal_system {csystem}
_space_group_IT_number {sgrpnr}
_space_group_name_H-M_alt '{hmsymb}'
"""

    csystem = mat.lattice.crystal_system
    if len(mat.lattice.space_group_suf) > 0:
        csystem = csystem[: -len(mat.lattice.space_group_suf)]

    ctxt = general.format(
        chemsum=mat.chemical_composition(with_spaces=True),
        a=mat.a,
        b=mat.b,
        c=mat.c,
        alpha=mat.alpha,
        beta=mat.beta,
        gamma=mat.gamma,
        vol=mat.lattice.UnitCellVolume(),
        csystem=csystem,
        sgrpnr=mat.lattice.space_group_nr,
        hmsymb=mat.lattice.name,
    )

    sgrpsuf = mat.lattice.space_group_suf[1:]
    if sgrpsuf:
        ctxt += f"_symmetry_cell_setting {sgrpsuf}\n"

    symloop = """
loop_
_space_group_symop_operation_xyz
"""

    for symop in mat.lattice.symops:
        symloop += "'" + symop.xyz() + "'\n"

    atomloop = """
loop_
_atom_site_label
_atom_site_type_symbol
_atom_site_symmetry_multiplicity
_atom_site_Wyckoff_symbol
_atom_site_fract_x
_atom_site_fract_y
_atom_site_fract_z
_atom_site_occupancy
_atom_site_B_iso_or_equiv
"""
    nidx = 0
    allatoms = list(mat.lattice.base())
    names = []
    for at, pos, occ, b in mat.lattice._wbase:
        wm, wl, _ = re.split("([a-z])", pos[0])
        nsite = int(wm)
        x, y, z = allatoms[nidx][1]
        names.append(unique_label(at.name, names))
        atomloop += "%s %s %d %c %.5f %.5f %.5f %.4f %.4f\n" % (
            names[-1],
            at.name,
            nsite,
            wl,
            x,
            y,
            z,
            occ,
            b,
        )
        nidx += nsite

    with open(filename, "w") as f:
        f.write(ctxt)
        f.write(symloop)
        f.write(atomloop)