Source code for xrayutilities.math.fit

# This file is part of xrayutilities.
#
# xrayutilities is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
# Copyright (c) 2012-2021, 2023 Dominik Kriegner <dominik.kriegner@gmail.com>
"""
module with a function wrapper to scipy.optimize.leastsq
for fitting of a 2D function to a peak or a 1D Gauss fit with
the odr package
"""

import time
import warnings

import numpy
import scipy.optimize as optimize
from scipy import odr

from .. import config, utilities
from ..exception import InputError
from .functions import (Gauss1d, Gauss1d_der_p, Gauss1d_der_x, Lorentz1d,
                        Lorentz1d_der_p, Lorentz1d_der_x, PseudoVoigt1d,
                        PseudoVoigt1d_der_p, PseudoVoigt1d_der_x,
                        PseudoVoigt1dasym, PseudoVoigt1dasym2)
from .misc import center_of_mass, fwhm_exp


[docs] def linregress(x, y): """ fast linregress to avoid usage of scipy.stats which is slow! NaN values in y are ignored by this function. Parameters ---------- x, y : array-like data coordinates and values Returns ------- p : tuple parameters of the linear fit (slope, offset) rsq: float R^2 value Examples -------- >>> (k, d), R2 = linregress([1, 2, 3], [3.3, 4.1, 5.05]) """ x = numpy.asarray(x) y = numpy.asarray(y) mask = numpy.logical_not(numpy.isnan(y)) lx, ly = (x[mask], y[mask]) if numpy.all(numpy.isclose(lx-lx[0], numpy.zeros_like(lx))): return (0, numpy.mean(ly)), 0 p = numpy.polyfit(lx, ly, 1) # calculation of r-squared f = numpy.polyval(p, lx) fbar = numpy.sum(ly) / len(ly) ssreg = numpy.sum((f-fbar)**2) sstot = numpy.sum((ly - fbar)**2) rsq = ssreg / sstot return p, rsq
[docs] def peak_fit(xdata, ydata, iparams=None, peaktype='Gauss', maxit=300, background='constant', plot=False, func_out=False, debug=False): """ fit function using odr-pack wrapper in scipy similar to https://github.com/tiagopereira/python_tips/wiki/Scipy%3A-curve-fitting for Gauss, Lorentz or Pseudovoigt-functions Parameters ---------- xdata : array_like x-coordinates of the data to be fitted ydata : array_like y-coordinates of the data which should be fit iparams : list, optional initial paramters, determined automatically if not specified peaktype : {'Gauss', 'Lorentz', 'PseudoVoigt', 'PseudoVoigtAsym', 'PseudoVoigtAsym2'}, optional type of peak to fit maxit : int, optional maximal iteration number of the fit background : {'constant', 'linear'}, optional type of background function plot : bool or str, optional flag to ask for a plot to visually judge the fit. If plot is a string it will be used as figure name, which makes reusing the figures easier. func_out : bool, optional returns the fitted function, which takes the independent variables as only argument (f(x)) Returns ------- params : list the parameters as defined in function `Gauss1d/Lorentz1d/PseudoVoigt1d/ PseudoVoigt1dasym`. In the case of linear background one more parameter is included! sd_params : list For every parameter the corresponding errors are returned. itlim : bool flag to tell if the iteration limit was reached, should be False fitfunc : function, optional the function used in the fit can be returned (see func_out). """ if plot: plot, plt = utilities.import_matplotlib_pyplot('XU.math.peak_fit') gfunc, gfunc_dx, gfunc_dp = _getfit_func(peaktype, background) # determine initial parameters _check_iparams(iparams, peaktype, background) if iparams is None: iparams = _guess_iparams(xdata, ydata, peaktype, background) if config.VERBOSITY >= config.DEBUG: print(f"XU.math.peak_fit: iparams: {str(tuple(iparams))}") # set up odr fitting peak = odr.Model(gfunc, fjacd=gfunc_dx, fjacb=gfunc_dp) sy = numpy.sqrt(ydata) sy[sy == 0] = 1 mydata = odr.RealData(xdata, ydata, sy=sy) myodr = odr.ODR(mydata, peak, beta0=iparams, maxit=maxit) myodr.set_job(fit_type=2) # use least-square fit fit = myodr.run() if config.VERBOSITY >= config.DEBUG: print('XU.math.peak_fit:') fit.pprint() # prints final message from odrpack fparam = fit.beta etaidx = [] if peaktype in ('PseudoVoigt', 'PseudoVoigtAsym'): if background == 'linear': etaidx = [-2, ] else: etaidx = [-1, ] elif peaktype == 'PseudoVoigtAsym2': etaidx = [5, 6] for e in etaidx: fparam[e] = 0 if fparam[e] < 0 else fparam[e] fparam[e] = 1 if fparam[e] > 1 else fparam[e] itlim = False if fit.stopreason[0] == 'Iteration limit reached': itlim = True if config.VERBOSITY >= config.INFO_LOW: print("XU.math.peak_fit: Iteration limit reached, " "do not trust the result!") if plot: if isinstance(plot, str): plt.figure(plot) else: plt.figure('XU:peak_fit') plt.plot(xdata, ydata, 'ok', label='data', mew=2) if debug: plt.plot(xdata, gfunc(iparams, xdata), '-', color='0.5', label='estimate') plt.plot(xdata, gfunc(fparam, xdata), '-r', label=f'{peaktype}-fit') plt.legend() if func_out: return fparam, fit.sd_beta, itlim, lambda x: gfunc(fparam, x) return fparam, fit.sd_beta, itlim
def _getfit_func(peaktype, background): """ internal function to prepare the model functions and derivatives for the peak_fit function. Parameters ---------- peaktype : {'Gauss', 'Lorentz', 'PseudoVoigt', 'PseudoVoigtAsym', 'PseudoVoigtAsym2'} type of peak function background : {'constant', 'linear'} type of background function Returns ------- f, f_dx, f_dp : functions fit function, function of derivative regarding `x`, and functions of derivatives regarding the parameters """ if peaktype == 'Gauss': f = Gauss1d fdx = Gauss1d_der_x fdp = Gauss1d_der_p elif peaktype == 'Lorentz': f = Lorentz1d fdx = Lorentz1d_der_x fdp = Lorentz1d_der_p elif peaktype == 'PseudoVoigt': f = PseudoVoigt1d fdx = PseudoVoigt1d_der_x fdp = PseudoVoigt1d_der_p elif peaktype == 'PseudoVoigtAsym': f = PseudoVoigt1dasym elif peaktype == 'PseudoVoigtAsym2': f = PseudoVoigt1dasym2 else: raise InputError("keyword argument peaktype takes invalid value!") if background == 'linear': def gfunc(param, x): return f(x, *param) + x * param[-1] else: def gfunc(param, x): return f(x, *param) if peaktype in ('Gauss', 'Lorentz', 'PseudoVoigt'): if background == 'linear': def gfunc_dx(param, x): return fdx(x, *param) + param[-1] def gfunc_dp(param, x): return numpy.vstack((fdp(x, *param), x)) else: def gfunc_dx(param, x): return fdx(x, *param) def gfunc_dp(param, x): return fdp(x, *param) else: gfunc_dx = None gfunc_dp = None return gfunc, gfunc_dx, gfunc_dp def _check_iparams(iparams, peaktype, background): """ internal function to check if the length of the supplied initial parameters is correct given the other settings of the peak_fit function. An InputError is raised in case of wrong shape or value. Parameters ---------- iparams : list initial paramters for the fit peaktype : {'Gauss', 'Lorentz', 'PseudoVoigt', 'PseudoVoigtAsym', 'PseudoVoigtAsym2'} type of peak to fit background : {'constant', 'linear'} type of background """ if iparams is None: return ptypes = {('Gauss', 'constant'): 4, ('Lorentz', 'constant'): 4, ('Gauss', 'linear'): 5, ('Lorentz', 'linear'): 5, ('PseudoVoigt', 'constant'): 5, ('PseudoVoigt', 'linear'): 6, ('PseudoVoigtAsym', 'constant'): 6, ('PseudoVoigtAsym', 'linear'): 7, ('PseudoVoigtAsym2', 'constant'): 7, ('PseudoVoigtAsym2', 'linear'): 8} if not all(numpy.isreal(iparams)): raise InputError("XU.math.peak_fit: all initial parameters need to" "be real!") if (peaktype, background) in ptypes: nparams = ptypes[(peaktype, background)] if len(iparams) != nparams: raise InputError(f"XU.math.peak_fit: {nparams} initial parameters " f"are needed for {peaktype}-peak with " f"{background} background.") else: raise InputError(f"XU.math.peak_fit: invalid peak ({peaktype}) or " f"background ({background})") def _guess_iparams(xdata, ydata, peaktype, background): """ internal function to automatically esitmate peak parameters from the data, considering also the background type. Parameters ---------- xdata : array-like x-coordinates of the data to be fitted ydata : array-like y-coordinates of the data which should be fit peaktype : {'Gauss', 'Lorentz', 'PseudoVoigt', 'PseudoVoigtAsym', 'PseudoVoigtAsym2'} type of peak to fit background : {'constant', 'linear'} type of background, either Returns ------- list of initial parameters estimated from the data """ ld = numpy.empty(len(ydata)) # estimate peak position ipos, ld, back, slope = center_of_mass(xdata, ydata, background, full_output=True) maxpos = xdata[numpy.argmax(ld)] avx = numpy.average(xdata) if numpy.abs(ipos - avx) < numpy.abs(maxpos-avx): ipos = maxpos # use the estimate which is further from the center # estimate peak width sigma1 = numpy.sqrt(numpy.sum(numpy.abs((xdata - ipos) ** 2 * ld)) / numpy.abs(numpy.sum(ld))) sigma2 = fwhm_exp(xdata, ld)/(2 * numpy.sqrt(2 * numpy.log(2))) sigma = sigma1 if sigma1 < sigma2 else sigma2 # build initial parameters iparams = [ipos, sigma, numpy.max(ld), back] if peaktype in ['Lorentz', 'PseudoVoigt']: iparams[1] *= 2 * numpy.sqrt(2 * numpy.log(2)) if peaktype in ['PseudoVoigtAsym', 'PseudoVoigtAsym2']: iparams.insert(1, iparams[1]) if peaktype in ['PseudoVoigt', 'PseudoVoigtAsym']: # set ETA parameter to be between Gauss and Lorentz shape iparams.append(0.5) if peaktype == 'PseudoVoigtAsym2': iparams.append(0.5) iparams.append(0.5) if background == 'linear': iparams.append(slope) return iparams
[docs] def gauss_fit(xdata, ydata, iparams=None, maxit=300): """ Gauss fit function using odr-pack wrapper in scipy similar to https://github.com/tiagopereira/python_tips/wiki/Scipy%3A-curve-fitting Parameters ---------- xdata : array-like x-coordinates of the data to be fitted ydata : array-like y-coordinates of the data which should be fit iparams: list, optional initial paramters for the fit, determined automatically if not given maxit : int, optional maximal iteration number of the fit Returns ------- params : list the parameters as defined in function ``Gauss1d(x, *param)`` sd_params : list For every parameter the corresponding errors are returned. itlim : bool flag to tell if the iteration limit was reached, should be False """ return peak_fit(xdata, ydata, iparams=iparams, peaktype='Gauss', maxit=maxit)
[docs] def fit_peak2d(x, y, data, start, drange, fit_function, maxfev=2000): """ fit a two dimensional function to a two dimensional data set e.g. a reciprocal space map. Parameters ---------- x : array-like first data coordinate (does not need to be regularly spaced) y : array-like second data coordinate (does not need to be regularly spaced) data : array-like data set used for fitting (e.g. intensity at the data coordinates) start : list set of starting parameters for the fit used as first parameter of function fit_function drange : list limits for the data ranges used in the fitting algorithm, e.g. it is clever to use only a small region around the peak which should be fitted, i.e. [xmin, xmax, ymin, ymax] fit_function : callable function which should be fitted. Call signature must be :func:`fit_function(x, y, *params) -> ndarray` Returns ------- fitparam : list fitted parameters cov : array-like covariance matrix """ s = time.time() if config.VERBOSITY >= config.INFO_ALL: print("XU.math.fit: Fitting started... ", end='') start = numpy.array(start) lx = x.flatten() ly = y.flatten() mask = (lx > drange[0]) * (lx < drange[1]) * \ (ly > drange[2]) * (ly < drange[3]) ly = ly[mask] lx = lx[mask] ldata = data.flatten()[mask] def errfunc(p, x, z, data): return fit_function(x, z, *p) - data p, cov, _, errmsg, success = optimize.leastsq( errfunc, start, args=(lx, ly, ldata), full_output=1, maxfev=maxfev) s = time.time() - s if config.VERBOSITY >= config.INFO_ALL: print("finished in %8.2f sec, (data length used %d)" % (s, ldata.size)) print(f"XU.math.fit: {errmsg}") # calculate correct variance covariance matrix if cov is not None: s_sq = (errfunc(p, lx, ly, ldata) ** 2).sum() / \ (len(ldata) - len(start)) pcov = cov * s_sq else: pcov = numpy.zeros((len(start), len(start))) if success not in [1, 2, 3, 4]: print("XU.math.fit: Could not obtain fit!") return p, pcov
[docs] def multPeakFit(x, data, peakpos, peakwidth, dranges=None, peaktype='Gaussian', returnerror=False): """ function to fit multiple Gaussian/Lorentzian peaks with linear background to a set of data Parameters ---------- x : array-like x-coordinate of the data data : array-like data array with same length as `x` peakpos : list initial parameters for the peak positions peakwidth : list initial values for the peak width dranges : list of tuples list of tuples with (min, max) value of the data ranges to use. does not need to have the same number of entries as peakpos peaktype : {'Gaussian', 'Lorentzian'} type of peaks to be used returnerror : bool decides if the fit errors of pos, sigma, and amp are returned (default: False) Returns ------- pos : list peak positions derived by the fit sigma : list peak width derived by the fit amp : list amplitudes of the peaks derived by the fit background : array-like background values at positions `x` if returnerror == True: sd_pos : list standard error of peak positions as returned by scipy.odr.Output sd_sigma : list standard error of the peak width sd_amp : list standard error of the peak amplitude """ warnings.warn("deprecated function -> use the lmfit Python packge instead", DeprecationWarning) if peaktype == 'Gaussian': pfunc = Gauss1d pfunc_derx = Gauss1d_der_x elif peaktype == 'Lorentzian': pfunc = Lorentz1d pfunc_derx = Lorentz1d_der_x else: raise ValueError('wrong value for parameter peaktype was given') def deriv_x(p, x): """ function to calculate the derivative of the signal of multiple peaks and background w.r.t. the x-coordinate Parameters ---------- p : list parameters, for every peak there needs to be position, sigma, amplitude and at the end two values for the linear background function (b0, b1) x : array-like x-coordinate """ derx = numpy.zeros(x.size) # sum up peak functions contributions for i in range(len(p) // 3): ldx = pfunc_derx(x, p[3 * i], p[3 * i + 1], p[3 * i + 2], 0) derx += ldx # background contribution k = p[-2] b = numpy.ones(x.size) * k return derx + b def deriv_p(p, x): """ function to calculate the derivative of the signal of multiple peaks and background w.r.t. the parameters Parameters ---------- p : list parameters, for every peak there needs to be position, sigma, amplitude and at the end two values for the linear background function (b0, b1) x : array-like x-coordinate returns derivative w.r.t. all the parameters with shape (len(p),x.size) """ derp = numpy.empty(0) # peak functions contributions for i in range(len(p) // 3): lp = (p[3 * i], p[3 * i + 1], p[3 * i + 2], 0) if peaktype == 'Gaussian': derp = numpy.append(derp, -2 * (lp[0] - x) * pfunc(x, *lp)) derp = numpy.append( derp, (lp[0] - x) ** 2 / (2 * lp[1] ** 3) * pfunc(x, *lp)) derp = numpy.append(derp, pfunc(x, *lp) / lp[2]) else: # Lorentzian derp = numpy.append(derp, 4 * (x - lp[0]) * lp[2] / lp[1] / (1 + (2 * (x - lp[0]) / lp[1]) ** 2) ** 2) derp = numpy.append(derp, 4 * (lp[0] - x) * lp[2] / lp[1] ** 2 / (1 + (2 * (x - lp[0]) / lp[1]) ** 2) ** 2) derp = numpy.append(derp, 1 / (1 + (2 * (x - p[0]) / p[1]) ** 2)) # background contributions derp = numpy.append(derp, x) derp = numpy.append(derp, numpy.ones(x.size)) # reshape output derp.shape = (len(p),) + x.shape return derp def fsignal(p, x): """ function to calculate the signal of multiple peaks and background Parameters ---------- p : list list of parameters, for every peak there needs to be position, sigma, amplitude and at the end two values for the linear background function (k, d) x : array-like x-coordinate """ f = numpy.zeros(x.size) # sum up peak functions for i in range(len(p) // 3): lf = pfunc(x, p[3 * i], p[3 * i + 1], p[3 * i + 2], 0) f += lf # background k = p[-2] d = p[-1] b = numpy.polyval((k, d), x) return f + b ########################## # create local data set (extract data ranges) if dranges: mask = numpy.array([False] * x.size) for i in range(len(dranges)): lrange = dranges[i] lmask = numpy.logical_and(x > lrange[0], x < lrange[1]) mask = numpy.logical_or(mask, lmask) lx = x[mask] ldata = data[mask] else: lx = x ldata = data # create initial parameter list p = [] # background # exclude +/-2 peakwidth around the peaks bmask = numpy.ones_like(lx, dtype=bool) for pp, pw in zip(peakpos, peakwidth): bmask = numpy.logical_and(bmask, numpy.logical_or(lx < (pp-2*pw), lx > (pp+2*pw))) if numpy.any(bmask): k, d = numpy.polyfit(lx[bmask], ldata[bmask], 1) else: if config.VERBOSITY >= config.DEBUG: print("XU.math.multPeakFit: no data outside peak regions!") k, d = (0, ldata.min()) # peak parameters for i in range(len(peakpos)): amp = ldata[(lx - peakpos[i]) >= 0][0] - \ numpy.polyval((k, d), lx)[(lx - peakpos[i]) >= 0][0] p += [peakpos[i], peakwidth[i], amp] # background parameters p += [k, d] if config.VERBOSITY >= config.DEBUG: print("XU.math.multPeakFit: intial parameters") print(p) ########################## # fit with odrpack model = odr.Model(fsignal, fjacd=deriv_x, fjacb=deriv_p) odata = odr.RealData(lx, ldata) my_odr = odr.ODR(odata, model, beta0=p) # fit type 2 for least squares my_odr.set_job(fit_type=2) fit = my_odr.run() if config.VERBOSITY >= config.DEBUG: print("XU.math.multPeakFit: fitted parameters") print(fit.beta) try: if fit.stopreason[0] not in ['Sum of squares convergence']: print("XU.math.multPeakFit: fit NOT converged " f"({fit.stopreason[0]})") return None, None, None, None except IndexError: print("XU.math.multPeakFit: fit most probably NOT converged (%s)" % str(fit.stopreason)) return None, None, None, None # prepare return values fpos = fit.beta[:-2:3] fwidth = numpy.abs(fit.beta[1:-2:3]) famp = fit.beta[2::3] background = numpy.polyval((fit.beta[-2], fit.beta[-1]), x) if returnerror: sd_pos = fit.sd_beta[:-2:3] sd_width = fit.sd_beta[1:-2:3] sd_amp = fit.sd_beta[2::3] return fpos, fwidth, famp, background, sd_pos, sd_width, sd_amp return fpos, fwidth, famp, background
[docs] def multPeakPlot(x, fpos, fwidth, famp, background, dranges=None, peaktype='Gaussian', fig="xu_plot", ax=None, fact=1.): """ function to plot multiple Gaussian/Lorentz peaks with background values given by an array Parameters ---------- x : array-like x-coordinate of the data fpos : list positions of the peaks fwidth : list width of the peaks famp : list amplitudes of the peaks background : array-like background values, same shape as `x` dranges : list of tuples list of (min, max) values of the data ranges to use. does not need to have the same number of entries as fpos peaktype : {'Gaussian', 'Lorentzian'} type of peaks to be used fig : int, str, or None matplotlib figure number or name ax : matplotlib.Axes matplotlib axes as alternative to the figure name fact : float factor to use as multiplicator in the plot """ warnings.warn("deprecated function -> use the lmfit Python packge instead", DeprecationWarning) success, plt = utilities.import_matplotlib_pyplot('XU.math.multPeakPlot') if not success: return if fig: plt.figure(fig) if ax: plt.sca(ax) # plot single peaks if dranges: mask = numpy.array([False] * x.size) for i in range(len(dranges)): lrange = dranges[i] lmask = numpy.logical_and(x > lrange[0], x < lrange[1]) mask = numpy.logical_or(mask, lmask) lx = x[mask] lb = background[mask] else: lx = x lb = background f = numpy.zeros(lx.size) for i in range(len(fpos)): if peaktype == 'Gaussian': lf = Gauss1d(lx, fpos[i], fwidth[i], famp[i], 0) elif peaktype == 'Lorentzian': lf = Lorentz1d(lx, fpos[i], fwidth[i], famp[i], 0) else: raise ValueError('wrong value for parameter peaktype was given') f += lf plt.plot(lx, (lf + lb) * fact, ':k') # plot summed signal plt.plot(lx, (f + lb) * fact, '-r', lw=1.5)