Source code for xrayutilities.io.edf

# This file is part of xrayutilities.
#
# xrayutilities is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
# Copyright (C) 2009-2010 Eugen Wintersberger <eugen.wintersberger@desy.de>
# Copyright (C) 2010-2012,2014-2019
#               Dominik Kriegner <dominik.kriegner@gmail.com>
# Copyright (C) 2012 Tanja Etzelstorfer <tanja.etzelstorfer@jku.at>

# module for handling files stored in the EDF data format developed by the ESRF

import os.path
import re
import struct

import numpy

from .. import config, utilities
from .filedir import FileDirectory
from .helper import xu_h5open, xu_open

edf_kv_split = re.compile(r"\s*=\s*")  # key value sepeartor for header data
edf_eokv = re.compile(r";")  # end of line for a header
# regular expressions for several ASCII representations of numbers
edf_integer_value = re.compile(r"\d+")
edf_float_value = re.compile(r"[+-]*\d+\.*\d*")
edf_float_e_value = re.compile(r"[+-]*\d+\.\d*e[+-]*\d*")
edf_name_start_num = re.compile(r"^\d")

# dictionary mapping EDF data type keywords onto struct data types
DataTypeDict = {"SignedByte": "b",
                "SignedShort": "h",
                "SignedInteger": "i",
                "SignedLong": "i",
                "FloatValue": "f",
                "DoubleValue": "d",
                "UnsignedByte": "B",
                "UnsignedShort": "H",
                "UnsignedInt": "I",
                "UnsignedLong": "L"}
# SignedLong is only 4byte, on my 64bit machine using SignedLong:"l" caused
# troubles
# UnsignedLong is only 4byte, on my 64bit machine using UnsignedLong:"L"
# caused troubles ("I" works)


[docs] class EDFFile:
[docs] def __init__(self, fname, nxkey="Dim_1", nykey="Dim_2", dtkey="DataType", path="", header=True, keep_open=False): """ Parameters ---------- fname : str name of the EDF file of type .edf or .edf.gz nxkey : str, optional name of the header key that holds the number of points in x-direction nykey : str, optional name of the header key that holds the number of points in y-direction dtkey : str, optional name of the header key that holds the datatype for the binary data path : str, optional path to the EDF file header : bool, optional has header (default true) keep_open : bool, optional if True the file handle is kept open between multiple calls which can cause significant speed-ups """ self.filename = fname self.full_filename = os.path.join(path, fname) # evaluate keyword arguments self.nxkey = nxkey self.nykey = nykey self.dtkey = dtkey self.headerflag = header # create attributes for holding data self._data = {} self._headers = [] self._data_offsets = [] self._data_read = False self._dimx = [] self._dimy = [] self._byte_order = [] self._fmt_str = [] self._dtype = [] self.Parse() if keep_open: self.fid = xu_open(self.full_filename, 'rb') else: self.fid = None self.nimages = len(self._data_offsets) self.header = self._headers[0]
[docs] def Parse(self): """ Parse file to find the number of entries and read the respective header information """ header = {} offset = 0 with xu_open(self.full_filename, 'rb') as fid: if config.VERBOSITY >= config.INFO_ALL: print(f"XU.io.EDFFile.Parse: file: {self.full_filename}") if self.headerflag: while True: # until end of file hdr_flag = False ml_value_flag = False # marks a multiline header for line in fid: # until end of header linelength = len(line) offset += linelength line = line.decode('ascii', 'ignore') if config.VERBOSITY >= config.DEBUG: print(line) if line == "": break # remove leading and trailing whitespace symbols line = line.strip() if line == "{" and not hdr_flag: # start with header hdr_flag = True header = {} continue if hdr_flag: # stop reading when the end of the header # is reached if line == "}": # place offset reading here - here we get the # real starting position of the binary data!! break # continue if the line has no content if line == "": continue # split key and value of the header entry if not ml_value_flag: try: key, value = edf_kv_split.split(line, 1) except ValueError: print(f"XU.io.EDFFile.Parse: line: {line}") key = key.strip() value = value.strip() # if the value extends over multiple lines set # the multiline value flag if value[-1] != ";": ml_value_flag = True else: value = value[:-1] value = value.strip() header[key] = value else: value = value + line if value[-1] == ";": ml_value_flag = False value = value[:-1] value = value.strip() header[key] = value else: break # append header to class variables self._byte_order.append(header["ByteOrder"]) self._fmt_str.append(DataTypeDict[header[self.dtkey]]) self._dimx.append(int(header[self.nxkey])) self._dimy.append(int(header[self.nykey])) self._dtype.append(header[self.dtkey]) self._headers.append(header) self._data_offsets.append(offset) # jump over data block tot_nofp = self._dimx[-1] * self._dimy[-1] dsize = tot_nofp * struct.calcsize(self._fmt_str[-1]) fid.seek(offset + dsize, 0) offset += dsize else: # in case of no header also save one set of defaults self._byte_order.append('LowByteFirst') self._fmt_str.append(DataTypeDict['UnsignedShort']) self._dimx.append(516) self._dimy.append(516) self._dtype.append('UnsignedShort') self._headers.append(header) self._data_offsets.append(offset) # try to parse motor positions and counters from last found header # into separate dictionary if 'motor_mne' in header: tkeys = header['motor_mne'].split() try: tval = numpy.array(header['motor_pos'].split(), dtype=numpy.double) self.motors = dict(zip(tkeys, tval)) except ValueError: print("XU.io.EDFFile.ReadData: Warning: header conversion " "of motor positions failed") if 'counter_mne' in header: tkeys = header['counter_mne'].split() try: tval = numpy.array(header['counter_pos'].split(), dtype=numpy.double) self.counters = dict(zip(tkeys, tval)) except ValueError: print("XU.io.EDFFile.ReadData: Warning: header conversion " "of counter values failed")
[docs] def ReadData(self, nimg=0): """ Read the CCD data of the specified image and return the data this function is called automatically when the 'data' property is accessed, but can also be called manually when only a certain image from the file is needed. Parameters ---------- nimg : int, optional number of the image which should be read (starts with 0) """ if self.fid: binfid = self.fid # move to the data section - jump over the header binfid.seek(self._data_offsets[nimg], 0) # read the data tot_nofp = self._dimx[nimg] * self._dimy[nimg] fmt_str = self._fmt_str[nimg] bindata = binfid.read(tot_nofp * struct.calcsize(fmt_str)) else: with xu_open(self.full_filename, 'rb') as binfid: # move to the data section - jump over the header binfid.seek(self._data_offsets[nimg], 0) # read the data tot_nofp = self._dimx[nimg] * self._dimy[nimg] fmt_str = self._fmt_str[nimg] bindata = binfid.read(tot_nofp * struct.calcsize(fmt_str)) if config.VERBOSITY >= config.DEBUG: print("XU.io.EDFFile: read binary data: nofp: %d len: %d" % (tot_nofp, len(bindata))) print(f"XU.io.EDFFile: format: {fmt_str}") try: data = numpy.frombuffer(bindata, count=tot_nofp, dtype=fmt_str) except ValueError: if fmt_str == 'L': fmt_str = 'I' try: data = numpy.frombuffer(bindata, count=tot_nofp, dtype=fmt_str) except ValueError: raise IOError("XU.io.EDFFile: data format (%s) has " "different byte-length, from amount of data " "one expects %d bytes per entry" % (fmt_str, len(bindata) / tot_nofp)) else: raise IOError("XU.io.EDFFile: data format (%s) has different " "byte-length, from amount of data one expects " "%d bytes per entry" % (fmt_str, len(bindata) / tot_nofp)) data.shape = (self._dimy[nimg], self._dimx[nimg]) if self._byte_order[nimg] != "LowByteFirst": # data = data.byteswap() print("XU.io.EDFFile.ReadData: check byte order - " "not low byte first") return data
@property def data(self): if not self._data_read: for i in range(self.nimages): self._data[i] = self.ReadData(i) self._data_read = True if self.nimages == 1: return self._data[0] return self._data
[docs] def Save2HDF5(self, h5f, group="/", comp=True): """ Saves the data stored in the EDF file in a HDF5 file as a HDF5 array. By default the data is stored in the root group of the HDF5 file - this can be changed by passing the name of a target group or a path to the target group via the "group" keyword argument. Parameters ---------- h5f : file-handle or str a HDF5 file object or name group : str, optional group where to store the data (default to the root of the file) comp : bool, optional activate compression - true by default """ with xu_h5open(h5f, 'a') as h5: if isinstance(group, str): if group == '/': g = h5 else: if group in h5: del h5[group] g = h5.create_group(group) else: g = group # create the array name ca_name = os.path.split(self.filename)[-1] ca_name = os.path.splitext(ca_name)[0] # perform a second time for case of .edf.gz files ca_name = os.path.splitext(ca_name)[0] ca_name = utilities.makeNaturalName(ca_name) if edf_name_start_num.match(ca_name): ca_name = "ccd_" + ca_name if config.VERBOSITY >= config.INFO_ALL: print(ca_name) # create the array description ca_desc = f"EDF CCD data from file {self.filename} " kwds = {'fletcher32': True} if comp: kwds['compression'] = 'gzip' if self.nimages != 1: ca_name += '_{n:04d}' for n in range(self.nimages): d = self.ReadData(n) name = ca_name.format(n=n) try: ca = g.create_dataset(name, data=d, **kwds) except ValueError: del g[name] ca = g.create_dataset(name, data=d, **kwds) ca.attrs['TITLE'] = ca_desc # finally we have to append the attributes for k in self.header: ca.attrs[utilities.makeNaturalName(k)] = self.header[k]
[docs] class EDFDirectory(FileDirectory): """ Parses a directory for EDF files, which can be stored to a HDF5 file for further usage """
[docs] def __init__(self, datapath, ext="edf", **keyargs): """ Parameters ---------- datapath : str directory of the EDF file ext : str, optional extension of the ccd files in the datapath (default: "edf") keyargs : dict, optional further keyword arguments are passed to EDFFile """ super().__init__(datapath, ext, EDFFile, **keyargs)