# This file is part of xrayutilities.
#
# xrayutilities is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
# Copyright (c) 2010-2021, 2023 Dominik Kriegner <dominik.kriegner@gmail.com>
import copy
import io
import itertools
import operator
import os
import re
import shlex
import numpy
from .. import config
from . import elements
from . import spacegrouplattice as sgl
from . import wyckpos
re_data = re.compile(r"^data_", re.IGNORECASE)
re_loop = re.compile(r"^loop_", re.IGNORECASE)
re_symop = re.compile(r"^\s*("
"_space_group_symop_operation_xyz|"
"_symmetry_equiv_pos_as_xyz)", re.IGNORECASE)
re_name = re.compile(r"^\s*_chemical_formula_sum", re.IGNORECASE)
re_atom = re.compile(r"^\s*_atom_site_label\s*$", re.IGNORECASE)
re_atomtyp = re.compile(r"^\s*_atom_site_type_symbol\s*$", re.IGNORECASE)
re_atomx = re.compile(r"^\s*_atom_site_fract_x", re.IGNORECASE)
re_atomy = re.compile(r"^\s*_atom_site_fract_y", re.IGNORECASE)
re_atomz = re.compile(r"^\s*_atom_site_fract_z", re.IGNORECASE)
re_uiso = re.compile(r"^\s*_atom_site_U_iso_or_equiv", re.IGNORECASE)
re_biso = re.compile(r"^\s*_atom_site_B_iso_or_equiv", re.IGNORECASE)
re_atomocc = re.compile(r"^\s*_atom_site_occupancy", re.IGNORECASE)
re_labelline = re.compile(r"^\s*_")
re_emptyline = re.compile(r"^\s*$")
re_quote = re.compile(r"'")
re_spacegroupnr = re.compile(r"^\s*(_space_group_IT_number|"
"_symmetry_Int_Tables_number)", re.IGNORECASE)
re_spacegroupname = re.compile(r"^\s*(_symmetry_space_group_name_H-M|"
"_space_group_name_H-M_alt)",
re.IGNORECASE)
re_spacegroupsetting = re.compile(r"^\s*_symmetry_cell_setting", re.IGNORECASE)
re_cell_a = re.compile(r"^\s*_cell_length_a", re.IGNORECASE)
re_cell_b = re.compile(r"^\s*_cell_length_b", re.IGNORECASE)
re_cell_c = re.compile(r"^\s*_cell_length_c", re.IGNORECASE)
re_cell_alpha = re.compile(r"^\s*_cell_angle_alpha", re.IGNORECASE)
re_cell_beta = re.compile(r"^\s*_cell_angle_beta", re.IGNORECASE)
re_cell_gamma = re.compile(r"^\s*_cell_angle_gamma", re.IGNORECASE)
re_comment = re.compile(r"^\s*#")
[docs]
class CIFFile:
"""
class for parsing CIF (Crystallographic Information File) files. The class
aims to provide an additional way of creating material classes instead of
manual entering of the information the lattice constants and unit cell
structure are parsed from the CIF file.
If multiple datasets are present in the CIF file this class will attempt to
parse all of them into the the data dictionary. By default all methods
access the first data set found in the file.
"""
[docs]
def __init__(self, filestr, digits=4):
"""
initialization of the CIFFile class
Parameters
----------
filestr : str, bytes
CIF filename or string representation of the CIF file
digits : int, optional
number of digits to check if position is unique
"""
self.digits = digits
if os.path.isfile(filestr):
self.filename = filestr
try:
fid = open(self.filename, "rb")
except OSError as exc:
raise IOError(f"cannot open CIF file {self.filename}") from exc
else:
if filestr.count('\n') == 0:
print('XU.materials.CIFFile: "filestr" contains only one line '
'but a file with that name does not exist! Continuing '
'with the assumption this one line string is the '
'content of a CIF file!')
self.filename = '__from_str__'
if isinstance(filestr, bytes):
fid = io.BytesIO(filestr)
else:
fid = io.BytesIO(bytes(filestr.encode('ascii')))
if config.VERBOSITY >= config.INFO_ALL:
print(f'XU.materials: parsing CIF file {self.filename}')
self.default_dataset = None
self.data = {}
self.Parse(fid)
fid.close()
[docs]
def Parse(self, fid):
"""
function to parse a CIF file. The function reads all the included data
sets and adds them to the data dictionary.
"""
fidpos = fid.tell()
while True:
line = fid.readline()
if not line:
break
fidpos = fid.tell()
line = line.decode('ascii', 'ignore')
m = re_data.match(line)
if m:
fid.seek(fidpos)
name = line[m.end():].strip()
self.data[name] = CIFDataset(fid, name, self.digits)
if self.data[name].has_atoms and not self.default_dataset:
self.default_dataset = name
[docs]
def SGLattice(self, dataset=None, use_p1=False):
"""
create a SGLattice object with the structure from the CIF dataset
Parameters
----------
dataset : str, optional
name of the dataset to use. if None the default one will be used.
use_p1 : bool, optional
force the use of P1 symmetry, default False
"""
if not dataset:
dataset = self.default_dataset
return self.data[dataset].SGLattice(use_p1=use_p1)
def __str__(self):
"""
returns a string with positions and names of the atoms for all datasets
"""
ostr = ""
ostr += f"CIF-File: {self.filename}\n"
for name, dataset in self.data.items():
ostr += f"\nDataset: {name}"
if name == self.default_dataset:
ostr += " (default)"
ostr += "\n"
ostr += str(dataset)
return ostr
[docs]
class CIFDataset:
"""
class for parsing CIF (Crystallographic Information File) files. The class
aims to provide an additional way of creating material classes instead of
manual entering of the information the lattice constants and unit cell
structure are parsed from the CIF file
"""
[docs]
def __init__(self, fid, name, digits):
"""
initialization of the CIFDataset class. This class parses one data
block.
Parameters
----------
fid : filehandle
file handle set to the beginning of the data block to be parsed
name : str
identifier string of the dataset
digits : int
number of digits to check if position is unique
"""
self.name = name
self.digits = digits
self.has_atoms = False
if config.VERBOSITY >= config.INFO_ALL:
print(f'XU.materials: parsing cif dataset {self.name}')
self.Parse(fid)
self.SymStruct()
[docs]
def Parse(self, fid):
"""
function to parse a CIF data set. The function reads the
space group symmetry operations and the basic atom positions
as well as the lattice constants and unit cell angles
"""
self.symops = []
self.atoms = []
self.lattice_const = numpy.zeros(3, dtype=numpy.double)
self.lattice_angles = numpy.zeros(3, dtype=numpy.double)
loop_start = False
symop_loop = False
atom_loop = False
def get_element(cifstring):
el = re.sub(r"['\"]", r"", cifstring)
if '+' in el or '-' in el:
# add oxidation number if not present
for sign in ('+', '-'):
if sign in el:
if not el[el.index(sign)-1].isdigit():
signidx = el.index(sign)
el = el[:signidx] + '1' + el[signidx:]
# replace special characters
for r, o in zip(('dot', 'p', 'm'), ('.', '+', '-')):
el = el.replace(o, r)
else:
el = re.sub(r"([0-9])", r"", el)
el = re.sub(r"\(\w*\)", r"", el)
try:
element = getattr(elements, el)
except AttributeError: # el not found, typ. due to oxidation state
f = re.search('[0-9]', el)
if not f and el == '?':
element = elements.Dummy
elif f is None:
raise ValueError("XU.materials: element ('%s') could not"
" be identified as chemical element. Only"
" abbreviations of element names are "
"supported." % (cifstring))
else:
elname = el[:f.start()]
if hasattr(elements, elname):
# here one might want to find a closer alternative than
# the neutral atom, but the effect this has should be
# minimal, currently simply the neutral atom is used
if config.VERBOSITY >= config.INFO_LOW:
print(f'XU.materials: Warning: element {elname} '
f'used instead of {cifstring}')
element = getattr(elements, elname)
else:
raise ValueError("XU.materials: element ('%s') could "
"not be found" % (cifstring))
return element
def floatconv(string):
"""
helper function to convert string with possible error
given in brackets to float
"""
try:
f = float(re.sub(r"\(.+\)", r"", string))
except ValueError:
f = numpy.nan
return f
def intconv(string):
"""
helper function to convert string to integer
"""
try:
i = int(string)
except ValueError:
i = None
return i
fidpos = fid.tell()
for line in fid.readlines():
linelen = len(line)
line = line.decode('ascii', 'ignore')
if config.VERBOSITY >= config.DEBUG:
print(line)
print(fid.tell(), fidpos)
if re_data.match(line):
fid.seek(fidpos)
break
fidpos += linelen
# ignore comment lines
if re_comment.match(line):
continue
if re_loop.match(line): # start of loop
if config.VERBOSITY >= config.DEBUG:
print('XU.materials: loop start found')
loop_start = True
loop_labels = []
symop_loop = False
symop_idx = None
atom_loop = False
alab_idx = None
ax_idx = None
ay_idx = None
az_idx = None
uiso_idx = None
biso_idx = None
occ_idx = None
elif re_labelline.match(line):
if re_cell_a.match(line):
self.lattice_const[0] = floatconv(line.split()[1])
elif re_cell_b.match(line):
self.lattice_const[1] = floatconv(line.split()[1])
elif re_cell_c.match(line):
self.lattice_const[2] = floatconv(line.split()[1])
elif re_cell_alpha.match(line):
self.lattice_angles[0] = floatconv(line.split()[1])
elif re_cell_beta.match(line):
self.lattice_angles[1] = floatconv(line.split()[1])
elif re_cell_gamma.match(line):
self.lattice_angles[2] = floatconv(line.split()[1])
elif re_spacegroupnr.match(line):
i = intconv(line.split()[1])
if i:
self.sgrp_nr = i
elif re_spacegroupname.match(line):
self.sgrp_name = ''.join(line.split()[1:]).strip("'")
elif re_spacegroupsetting.match(line):
i = intconv(line.split()[1])
if i:
self.sgrp_setting = i
elif re_name.match(line):
try:
self.name = shlex.split(line)[1]
except IndexError:
self.name = None
if loop_start:
loop_labels.append(line.strip())
if re_symop.match(line): # start of symmetry op. loop
if config.VERBOSITY >= config.DEBUG:
print('XU.materials: symop-loop identified')
symop_loop = True
symop_idx = len(loop_labels) - 1
elif re_atom.match(line) or re_atomtyp.match(line):
# start of atom position loop
if config.VERBOSITY >= config.DEBUG:
print('XU.materials: atom position-loop found')
atom_loop = True
if re_atomtyp.match(line):
alab_idx = len(loop_labels) - 1
elif not list(filter(re_atomtyp.match, loop_labels)):
# ensure precedence of atom_site_type_symbol
alab_idx = len(loop_labels) - 1
elif re_atomx.match(line):
ax_idx = len(loop_labels) - 1
if config.VERBOSITY >= config.DEBUG:
print('XU.materials: atom position x: col%d'
% ax_idx)
elif re_atomy.match(line):
ay_idx = len(loop_labels) - 1
elif re_atomz.match(line):
az_idx = len(loop_labels) - 1
elif re_uiso.match(line):
uiso_idx = len(loop_labels) - 1
elif re_biso.match(line):
biso_idx = len(loop_labels) - 1
elif re_atomocc.match(line):
occ_idx = len(loop_labels) - 1
elif re_emptyline.match(line):
loop_start = False
symop_loop = False
atom_loop = False
continue
elif symop_loop: # symmetry operation entry
loop_start = False
entry = shlex.split(line)[symop_idx]
if re_quote.match(entry):
opstr = entry
else:
opstr = "'" + entry + "'"
opstr = re.sub(r"^'", r"(", opstr)
opstr = re.sub(r"'$", r")", opstr)
self.symops.append(opstr)
elif atom_loop: # atom label and position
loop_start = False
asplit = line.split()
try:
atom = get_element(asplit[alab_idx])
apos = (floatconv(asplit[ax_idx]),
floatconv(asplit[ay_idx]),
floatconv(asplit[az_idx]))
occ = floatconv(asplit[occ_idx]) if occ_idx else 1
if numpy.isnan(occ):
occ = 1
uiso = floatconv(asplit[uiso_idx]) if uiso_idx else 0
biso = floatconv(asplit[biso_idx]) if biso_idx else 0
if numpy.isnan(uiso):
uiso = 0
if numpy.isnan(biso):
biso = 0
if biso == 0:
biso = 8 * numpy.pi**2 * uiso
self.atoms.append((atom, apos, occ, biso))
except IndexError:
if config.VERBOSITY >= config.INFO_LOW:
print('XU.materials: could not parse atom line: "%s"'
% line.strip())
if self.atoms:
self.has_atoms = True
[docs]
def SymStruct(self):
"""
function to obtain the list of different atom positions in the unit
cell for the different types of atoms and determine the space group
number and origin choice if available. The data are obtained from the
data parsed from the CIF file.
"""
def rem_white(string):
return string.replace(' ', '')
if hasattr(self, 'sgrp_name'):
# determine spacegroup
cifsgn = rem_white(self.sgrp_name).split(':')[0]
for nr, name in sgl.sgrp_name.items():
if cifsgn == rem_white(name):
self.sgrp_nr = int(nr)
if not hasattr(self, 'sgrp_nr'):
# try ignoring the minuses
for nr, name in sgl.sgrp_name.items():
if cifsgn == rem_white(name.replace('-', '')):
self.sgrp_nr = int(nr)
if len(self.sgrp_name.split(':')) > 1:
self.sgrp_suf = ':' + self.sgrp_name.split(':')[1]
elif hasattr(self, 'sgrp_setting'):
self.sgrp_suf = ':%d' % self.sgrp_setting
if not hasattr(self, 'sgrp_suf'):
if hasattr(self, 'sgrp_nr'):
self.sgrp_suf = sgl.get_possible_sgrp_suf(self.sgrp_nr)
else:
self.sgrp_suf = ''
if isinstance(self.sgrp_suf, str):
suffixes = [self.sgrp_suf, ]
else:
suffixes = copy.copy(self.sgrp_suf)
for sgrp_suf in suffixes:
self.sgrp_suf = sgrp_suf
if hasattr(self, 'sgrp_nr'):
self.sgrp = str(self.sgrp_nr) + self.sgrp_suf
if config.VERBOSITY >= config.INFO_ALL:
print(f'XU.materials: trying space group {self.sgrp}')
# determine all unique positions for definition of a P1 space group
symops = self.symops
if not symops and hasattr(self, 'sgrp') and self.atoms:
label = sorted(wyckpos.wp[self.sgrp],
key=lambda s: int(s[:-1]))
symops = wyckpos.wp[self.sgrp][label[-1]][1]
if config.VERBOSITY >= config.INFO_ALL:
print('XU.materials: no symmetry operations in CIF-Dataset'
'; using built in general positions.')
self.unique_positions = []
for el, (x, y, z), occ, biso in self.atoms:
unique_pos = set()
for symop in symops:
pos = eval(symop, {'x': x, 'y': y, 'z': z})
pos = numpy.asarray(pos)
# check that position is within unit cell
pos = pos - numpy.round(pos, self.digits) // 1
unique_pos.add(tuple(numpy.round(pos, self.digits)))
self.unique_positions.append((el, unique_pos, occ, biso))
# determine Wyckoff positions and free parameters of unit cell
if hasattr(self, 'sgrp'):
self.wp = []
self.occ = []
self.elements = []
self.biso = []
keys = list(wyckpos.wp[self.sgrp])
wpn = [int(re.sub(r"([a-zA-Z])", r"", k)) for k in keys]
for i, (el, (x, y, z), occ, biso) in enumerate(self.atoms):
# candidate positions from number of unique atoms
natoms = len(self.unique_positions[i][1])
wpcand = []
for j, n in enumerate(wpn):
if n == natoms:
wpcand.append((keys[j],
wyckpos.wp[self.sgrp][keys[j]]))
for j, (k, wp) in enumerate(
sorted(wpcand, key=operator.itemgetter(1))):
parint, poslist, _ = wp
for positem in poslist:
foundwp, xyz = sgl.testwp(parint, positem,
(x, y, z), self.digits)
if foundwp:
if xyz is None:
self.wp.append(k)
else:
self.wp.append((k, xyz))
self.elements.append(el)
self.occ.append(occ)
self.biso.append(biso)
break
if foundwp:
break
if config.VERBOSITY >= config.INFO_ALL:
print(f"XU.materials: {len(self.wp):d} of "
f"{len(self.atoms):d} Wyckoff positions identified")
if len(self.wp) < len(self.atoms):
print(f"XU.materials: space group {self.sgrp} seems "
"not to fit")
# free unit cell parameters
self.crystal_system, _ = sgl.sgrp_sym[self.sgrp_nr]
self.crystal_system += self.sgrp_suf
self.uc_params = []
p2i = {'a': 0, 'b': 1, 'c': 2,
'alpha': 0, 'beta': 1, 'gamma': 2}
for pname in sgl.sgrp_params[self.crystal_system][0]:
if pname in ('a', 'b', 'c'):
self.uc_params.append(self.lattice_const[p2i[pname]])
if pname in ('alpha', 'beta', 'gamma'):
self.uc_params.append(self.lattice_angles[p2i[pname]])
if len(self.wp) == len(self.atoms):
if config.VERBOSITY >= config.INFO_ALL:
print("XU.materials: identified space group as "
f"{self.sgrp}")
break
[docs]
def SGLattice(self, use_p1=False):
"""
create a SGLattice object with the structure from the CIF file
"""
if not use_p1:
if hasattr(self, 'sgrp'):
if len(self.wp) == len(self.atoms):
return sgl.SGLattice(self.sgrp, *self.uc_params,
atoms=self.elements, pos=self.wp,
occ=self.occ, b=self.biso)
if config.VERBOSITY >= config.INFO_LOW:
print('XU.materials: Wyckoff positions missing, '
'using P1')
else:
if config.VERBOSITY >= config.INFO_LOW:
print('XU.materials: space-group detection failed, '
'using P1')
atomdict = {'atoms': [], 'pos': [], 'occ': [], 'b': []}
for element, positions, o, b in self.unique_positions:
for p in positions:
atomdict['atoms'].append(element)
atomdict['pos'].append(('1a', p))
atomdict['occ'].append(o)
atomdict['b'].append(b)
return sgl.SGLattice(1, *itertools.chain(self.lattice_const,
self.lattice_angles),
**atomdict)
def __str__(self):
"""
returns a string with positions and names of the atoms
"""
ostr = ""
ostr += "unit cell structure:"
if hasattr(self, 'sgrp'):
ostr += " %s %s %s\n" % (self.sgrp, self.crystal_system,
getattr(self, 'sgrp_name', ''))
else:
ostr += "\n"
ostr += "a: %8.4f b: %8.4f c: %8.4f\n" % tuple(self.lattice_const)
ostr += "alpha: %6.2f beta: %6.2f gamma: %6.2f\n" % tuple(
self.lattice_angles)
if self.unique_positions:
ostr += "Unique atom positions in unit cell\n"
for atom in self.unique_positions:
ostr += atom[0].name + " (%d): \n" % atom[0].num
for pos in atom[1]:
ostr += str(numpy.round(pos, self.digits)) + "\n"
return ostr
[docs]
def cifexport(filename, mat):
"""
function to export a Crystal instance to CIF file. This in particular
includes the atomic coordinates, however, ignores for example the elastic
parameters.
"""
def unique_label(basename, names):
num = 1
name = f'{basename}{num:d}'
while name in names:
num += 1
name = f'{basename}{num:d}'
return name
general = """data_global
_chemical_formula_sum '{chemsum}'
_cell_length_a {a:.5f}
_cell_length_b {b:.5f}
_cell_length_c {c:.5f}
_cell_angle_alpha {alpha:.4f}
_cell_angle_beta {beta:.4f}
_cell_angle_gamma {gamma:.4f}
_cell_volume {vol:.3f}
_space_group_crystal_system {csystem}
_space_group_IT_number {sgrpnr}
_space_group_name_H-M_alt '{hmsymb}'
"""
csystem = mat.lattice.crystal_system
if len(mat.lattice.space_group_suf) > 0:
csystem = csystem[:-len(mat.lattice.space_group_suf)]
ctxt = general.format(chemsum=mat.chemical_composition(with_spaces=True),
a=mat.a, b=mat.b, c=mat.c,
alpha=mat.alpha, beta=mat.beta, gamma=mat.gamma,
vol=mat.lattice.UnitCellVolume(),
csystem=csystem,
sgrpnr=mat.lattice.space_group_nr,
hmsymb=mat.lattice.name)
sgrpsuf = mat.lattice.space_group_suf[1:]
if sgrpsuf:
ctxt += f'_symmetry_cell_setting {sgrpsuf}\n'
symloop = """
loop_
_space_group_symop_operation_xyz
"""
for symop in mat.lattice.symops:
symloop += "'" + symop.xyz() + "'\n"
atomloop = """
loop_
_atom_site_label
_atom_site_type_symbol
_atom_site_symmetry_multiplicity
_atom_site_Wyckoff_symbol
_atom_site_fract_x
_atom_site_fract_y
_atom_site_fract_z
_atom_site_occupancy
_atom_site_B_iso_or_equiv
"""
nidx = 0
allatoms = list(mat.lattice.base())
names = []
for at, pos, occ, b in mat.lattice._wbase:
wm, wl, _ = re.split('([a-z])', pos[0])
nsite = int(wm)
x, y, z = allatoms[nidx][1]
names.append(unique_label(at.name, names))
atomloop += '%s %s %d %c %.5f %.5f %.5f %.4f %.4f\n' % (
names[-1], at.name, nsite, wl, x, y, z, occ, b)
nidx += nsite
with open(filename, 'w') as f:
f.write(ctxt)
f.write(symloop)
f.write(atomloop)