Source code for qexpy.data.datasets

"""Defines data structures for collections of individual measurements"""

import re
import warnings

import numpy as np
import math as m

from typing import List  # pylint: disable=unused-import
from numbers import Real

from qexpy.utils import IllegalArgumentError

from . import data as dt
from . import utils as dut

import qexpy.utils as utils

ARRAY_TYPES = np.ndarray, list


[docs]class ExperimentalValueArray(np.ndarray):
    """An array of experimental values, alias: MeasurementArray

    An ExperimentalValueArray (MeasurementArray) represents a series of ExperimentalValue
    objects. It is implemented as a sub-class of numpy.ndarray. This class is given an alias
    "MeasurementArray" for more intuitive user interface.

    Args:
        *args: The first argument is an array of real numbers representing the center values
            of the measurements. The second argument (if present) is either a positive real
            number or an array of positive real numbers of the same length as the data array,
            representing the uncertainties on the measurements.

    Keyword Args:
        data (List): an array of real numbers representing the center values
        error (Real|List): the uncertainties on the measurements
        relative_error (Real|List): the relative uncertainties on the measurements
        unit (str): the unit of the measurement
        name (str): the name of the measurement

    Examples:
        >>> import qexpy as q

        >>> # We can instantiate an array of measurements with two lists
        >>> a = q.MeasurementArray([1, 2, 3, 4, 5], [0.1, 0.2, 0.3, 0.4, 0.5])
        >>> a
        ExperimentalValueArray([MeasuredValue(1.0 +/- 0.1),
                    MeasuredValue(2.0 +/- 0.2),
                    MeasuredValue(3.0 +/- 0.3),
                    MeasuredValue(4.0 +/- 0.4),
                    MeasuredValue(5.0 +/- 0.5)], dtype=object)

        >>> # We can also create an array of measurements with a single uncertainty.
        >>> # As usual, if the error is not specified, they will be set to 0 by default
        >>> a = q.MeasurementArray([1, 2, 3, 4, 5], 0.5, unit="m", name="length")
        >>> a
        ExperimentalValueArray([MeasuredValue(1.0 +/- 0.5),
                    MeasuredValue(2.0 +/- 0.5),
                    MeasuredValue(3.0 +/- 0.5),
                    MeasuredValue(4.0 +/- 0.5),
                    MeasuredValue(5.0 +/- 0.5)], dtype=object)

        >>> # We can access the different statistical properties of this array
        >>> print(np.sum(a))
        15 +/- 1 [m]
        >>> print(a.mean())
        3.0 +/- 0.7 [m]
        >>> a.std()
        1.5811388300841898

        >>> # Manipulation of a MeasurementArray is also very easy. We can append or insert
        >>> # into the array values in multiple formats
        >>> a = a.append((7, 0.2))  # a measurement can be inserted as a tuple
        >>> print(a[5])
        length = 7.0 +/- 0.2 [m]
        >>> a = a.insert(0, 8)  # if error is not specified, it is set to 0 by default
        >>> print(a[0])
        length = 8 +/- 0 [m]

        >>> # The same operations also works with array-like objects, in which case they are
        >>> # concatenated into a single array
        >>> a = a.append([(10, 0.1), (11, 0.3)])
        >>> a
        ExperimentalValueArray([MeasuredValue(8.0 +/- 0),
                    MeasuredValue(1.0 +/- 0.5),
                    MeasuredValue(2.0 +/- 0.5),
                    MeasuredValue(3.0 +/- 0.5),
                    MeasuredValue(4.0 +/- 0.5),
                    MeasuredValue(5.0 +/- 0.5),
                    MeasuredValue(7.0 +/- 0.2),
                    MeasuredValue(10.0 +/- 0.1),
                    MeasuredValue(11.0 +/- 0.3)], dtype=object)

        >>> # The ExperimentalValueArray object is vectorized just like numpy.ndarray. You
        >>> # can perform basic arithmetic operations as well as functions with them and get
        >>> # back ExperimentalValueArray objects
        >>> a = q.MeasurementArray([0, 1, 2], 0.5)
        >>> a + 2
        ExperimentalValueArray([DerivedValue(2.0 +/- 0.5),
                        DerivedValue(3.0 +/- 0.5),
                        DerivedValue(4.0 +/- 0.5)], dtype=object)
        >>> q.sin(a)
        ExperimentalValueArray([DerivedValue(0.0 +/- 0.5),
                        DerivedValue(0.8 +/- 0.3),
                        DerivedValue(0.9 +/- 0.2)], dtype=object)

    See Also:
        numpy.ndarray

    """

    # pylint: disable=no-member,arguments-differ,too-many-function-args

    def __new__(cls, *args, **kwargs):
        """Constructor for an ExperimentalValueArray

        __new__ is used instead of __init__ for object initialization. This is required for
        subclassing the numpy.ndarray type.

        """

        data = kwargs.pop("data", args[0] if args else None)

        if not isinstance(data, ARRAY_TYPES):
            raise TypeError("You have not provided valid data to initialize the array.")
        data = np.asarray(data)

        error = kwargs.pop("error", args[1] if len(args) > 1 else None)
        relative_error = kwargs.pop("relative_error", None)

        error_array = _get_error_array_helper(data, error, relative_error)

        if all(isinstance(x, dt.ExperimentalValue) for x in data):
            if error is None and relative_error is None:
                error_array = None
            return ExperimentalValueArray.__wrap(data, error_array=error_array, **kwargs)

        if not all(isinstance(x, Real) for x in data):
            raise TypeError("Some values in the array are not real numbers")

        values = list(
            dt.MeasuredValue(val, err, **kwargs) for val, err in zip(data, error_array))
        for index, meas in enumerate(values):
            meas.name = "{}_{}".format(meas.name, index) if meas.name else ""

        # Initialize the instance to a numpy.ndarray
        obj = np.asarray(values, dtype=dt.ExperimentalValue).view(ExperimentalValueArray)

        # Added so that subclasses of this are of the correct type
        obj.__class__ = cls

        return obj

    def __str__(self):
        value_errors = ", ".join(variable.print_value_error() for variable in self)
        name = "{} = ".format(self.name) if self.name else ""
        unit = " ({})".format(self.unit) if self.unit else ""
        return "{}[ {} ]{}".format(name, value_errors, unit)

    def __setitem__(self, key, value):
        if isinstance(value, Real):
            self[key].value = value
        else:
            super().__setitem__(
                key, dut.wrap_in_measurement(value, unit=self.unit, name=self.name))
            if self.name:
                self[key].name = "{}_{}".format(self.name, key)

    def __pow__(self, power):
        if isinstance(power, ARRAY_TYPES):
            return super().__pow__(power)
        return super().__pow__(dut.wrap_in_experimental_value(power))

    def __rpow__(self, other):
        if isinstance(other, ARRAY_TYPES):
            return super().__rpow__(other)
        return super().__rpow__(dut.wrap_in_experimental_value(other))

    def __add__(self, other):
        if isinstance(other, ARRAY_TYPES):
            return super().__add__(other)
        return super().__add__(dut.wrap_in_experimental_value(other))

    def __radd__(self, other):
        if isinstance(other, ARRAY_TYPES):
            return super().__radd__(other)
        return super().__radd__(dut.wrap_in_experimental_value(other))

    def __sub__(self, other):
        if isinstance(other, ARRAY_TYPES):
            return super().__sub__(other)
        return super().__sub__(dut.wrap_in_experimental_value(other))

    def __rsub__(self, other):
        if isinstance(other, ARRAY_TYPES):
            return super().__rsub__(other)
        return super().__rsub__(dut.wrap_in_experimental_value(other))

    def __mul__(self, other):
        if isinstance(other, ARRAY_TYPES):
            return super().__mul__(other)
        return super().__mul__(dut.wrap_in_experimental_value(other))

    def __rmul__(self, other):
        if isinstance(other, ARRAY_TYPES):
            return super().__rmul__(other)
        return super().__rmul__(dut.wrap_in_experimental_value(other))

    def __truediv__(self, other):
        if isinstance(other, ARRAY_TYPES):
            return super().__truediv__(other)
        return super().__truediv__(dut.wrap_in_experimental_value(other))

    def __rtruediv__(self, other):
        if isinstance(other, ARRAY_TYPES):
            return super().__rtruediv__(other)
        return super().__rtruediv__(dut.wrap_in_experimental_value(other))

    def __array_finalize__(self, obj):
        """wrap up array initialization"""
        if obj is None or not (self.shape and isinstance(self[0], dt.ExperimentalValue)):
            return  # Skip if this is not a regular array of ExperimentalValue objects
        if hasattr(obj, "name"):
            name = getattr(obj, "name", "")
        else:
            name = getattr(self, "name", "")
        # re-index the names of the measurements
        for index, measurement in enumerate(self):
            measurement.name = "{}_{}".format(name, index) if name else ""

    @property
    def name(self):
        """str: Name of this array of values

        A name can be given to this data set, and each measurement within this list will be
        named in the form of "name_index". For example, if the name is specified as "length",
        the items in this array will be named "length_0", "length_1", "length_2", ...

        """
        return re.sub(r"_[0-9]+$", "", self[0].name)

    @name.setter
    def name(self, new_name: str):
        if not isinstance(new_name, str):
            raise TypeError("Cannot set name to \"{}\"!".format(type(new_name).__name__))
        for index, measurement in enumerate(self):
            measurement.name = "{}_{}".format(new_name, index)

    @property
    def unit(self):
        """str: The unit of this array of values

        It is assumed that the set of data that constitutes one ExperimentalValueArray have
        the same unit, which, when assigned, is given too all the items of the array.

        """
        return self[0].unit

    @unit.setter
    def unit(self, unit_string: str):
        if not isinstance(unit_string, str):
            raise TypeError("Cannot set unit to \"{}\"!".format(type(unit_string).__name__))
        new_unit = utils.parse_unit_string(unit_string) if unit_string else {}
        for data in self:
            data._unit = new_unit

    @property
    def values(self):
        """np.ndarray: An array consisting of the center values of each item"""
        return np.asarray(list(data.value for data in self))

    @property
    def errors(self):
        """np.ndarray: An array consisting of the uncertainties of each item"""
        return np.asarray(list(data.error for data in self))

[docs]    def append(self, value) -> "ExperimentalValueArray":
        """Adds a value to the end of this array and returns the new array

        Args:
            value: The value to be appended to this array. This can be a real number, a pair
                of value and error in a tuple, an ExperimentalValue instance, or an array
                consisting of any of the above.

        Returns:
            The new ExperimentalValueArray instance

        """
        value = dut.wrap_in_value_array(value, unit=self.unit, name=self.name)
        result = np.append(self, value).view(ExperimentalValueArray)
        for index, measurement in enumerate(result):
            measurement.name = "{}_{}".format(self.name, index)
            measurement.unit = self.unit
        return result

[docs]    def insert(self, index: int, value) -> "ExperimentalValueArray":
        """adds a value to a position in this array and returns the new array

        Args:
            index (int): the position to insert the value
            value: The value to be inserted into this array. This can be a real number, a
                pair of value and error in a tuple, an ExperimentalValue instance, or an
                array consisting of any of the above.

        Returns:
            The new ExperimentalValueArray instance

        """
        value = dut.wrap_in_value_array(value, unit=self.unit, name=self.name)
        result = np.insert(self, index, value).view(ExperimentalValueArray)
        for idx, measurement in enumerate(result):
            measurement.name = "{}_{}".format(self.name, idx)
            measurement.unit = self.unit
        return result

[docs]    def delete(self, index: int) -> "ExperimentalValueArray":
        """deletes the value on the requested position and returns the new array

        Args:
            index (int): the index of the value to be deleted

        Returns:
            The new ExperimentalValueArray instance

        """
        result = np.delete(self, index).view(ExperimentalValueArray)
        for idx, measurement in enumerate(result):
            measurement.name = "{}_{}".format(self.name, idx)
        return result

[docs]    def mean(self, **_) -> "dt.ExperimentalValue":  # pylint:disable=arguments-differ
        """The mean of the array"""
        result = np.mean(self.values)
        error = self.error_on_mean()
        name = "mean of {}".format(self.name) if self.name else ""
        return dt.MeasuredValue(float(result), error, unit=self.unit, name=name)

[docs]    def std(self, ddof=1, **_) -> float:  # pylint:disable=arguments-differ
        """The standard deviation of this array"""
        return float(np.std(self.values, ddof=ddof))

[docs]    def sum(self, **_) -> "dt.ExperimentalValue":  # pylint:disable=arguments-differ
        """The sum of the array"""
        result = np.sum(self.values)
        error = np.sqrt(np.sum(self.errors ** 2))
        return dt.MeasuredValue(float(result), float(error), unit=self.unit, name=self.name)

[docs]    def error_on_mean(self) -> float:
        """The error on the mean of this array"""
        return self.std() / m.sqrt(self.size)

[docs]    def error_weighted_mean(self) -> float:
        """The error weighted mean of this array"""
        if any(err == 0 for err in self.errors):
            warnings.warn(
                "One or more errors are 0, the error weighted mean cannot be calculated.")
            return np.nan
        weights = np.asarray(list(1 / (err ** 2) for err in self.errors))
        return float(np.sum(weights * self.values) / np.sum(weights))

[docs]    def propagated_error(self) -> float:
        """The propagated error from the error weighted mean calculation"""
        if any(err == 0 for err in self.errors):
            warnings.warn(
                "One or more errors are 0, the propagated error cannot be calculated.")
            return np.nan
        weights = np.asarray(list(1 / (err ** 2) for err in self.errors))
        return 1 / np.sqrt(np.sum(weights))

    @classmethod
    def __wrap(cls, data, **kwargs):
        """if an array of ExperimentalValue objects are passed in, simply wrap it"""

        error_array = kwargs.get("error_array", None)

        if error_array is not None:
            for x, err in zip(data, error_array):
                x.error = err  # update the errors if specified

        name = kwargs.get("name", None)
        unit = kwargs.get("unit", None)

        if name is not None:
            for idx, x in enumerate(data):
                x.name = "{}_{}".format(name, idx)
        if unit is not None:
            for x in data:
                x.unit = unit

        obj = data.view(ExperimentalValueArray)
        obj.__class__ = cls
        return obj


[docs]class XYDataSet:
    """A pair of ExperimentalValueArray objects

    QExPy is capable of multiple ways of data handling. One typical case in experimental data
    analysis is for a pair of data sets, which is usually plotted or fitted with a curve.

    Args:
        xdata (List|np.ndarray): an array of values for x-data
        ydata (List|np.ndarray): an array of values for y-data

    Keyword Args:
        xerr (Real|List): the uncertainty on x data
        yerr (Real|List): the uncertainty on y data
        xunit (str): the unit of the x data set
        yunit (str): the unit of the y data set
        xname (str): the name of the x data set
        yname (str): the name of the y data set

    Examples:

        >>> import qexpy as q

        >>> a = q.XYDataSet(xdata=[0, 1, 2, 3, 4], xerr=0.5, xunit="m", xname="length",
        >>>                 ydata=[3, 4, 5, 6, 7], yerr=[0.1,0.2,0.3,0.4,0.5],
        >>>                 yunit="kg", yname="weight")
        >>> a.xvalues
        array([0, 1, 2, 3, 4])
        >>> a.xerr
        array([0.5, 0.5, 0.5, 0.5, 0.5])
        >>> a.yerr
        array([0.1, 0.2, 0.3, 0.4, 0.5])
        >>> a.xdata
        ExperimentalValueArray([MeasuredValue(0.0 +/- 0.5),
                        MeasuredValue(1.0 +/- 0.5),
                        MeasuredValue(2.0 +/- 0.5),
                        MeasuredValue(3.0 +/- 0.5),
                        MeasuredValue(4.0 +/- 0.5)], dtype=object)

    """

    def __init__(self, *args, **kwargs):

        xunit = kwargs.get("xunit", "")
        yunit = kwargs.get("yunit", "")
        xname = kwargs.get("xname", "")
        yname = kwargs.get("yname", "")

        self._name = kwargs.get("name", "")

        xerr = kwargs.get("xerr", None)
        yerr = kwargs.get("yerr", None)

        xdata = kwargs.pop("xdata", args[0] if len(args) >= 2 else None)
        ydata = kwargs.pop("ydata", args[1] if len(args) >= 2 else None)

        xdata = XYDataSet.__wrap_data(xdata, xerr, name=xname, unit=xunit)
        ydata = XYDataSet.__wrap_data(ydata, yerr, name=yname, unit=yunit)

        if len(xdata) != len(ydata):
            raise ValueError("The length of xdata and ydata don't match!")

        self.xdata = xdata  # type: ExperimentalValueArray
        self.ydata = ydata  # type: ExperimentalValueArray

    @property
    def name(self):
        """str: The name of this data set"""
        return self._name if self._name else "XY Dataset"

    @name.setter
    def name(self, new_name: str):
        if not isinstance(new_name, str):
            raise TypeError("Cannot set name to \"{}\"".format(type(new_name).__name__))
        self._name = new_name

    @property
    def xvalues(self):
        """np.ndarray: The values of the x data set"""
        return self.xdata.values

    @property
    def xerr(self):
        """np.ndarray: The errors of the x data set"""
        return self.xdata.errors

    @property
    def yvalues(self):
        """np.ndarray: The values of the y data set"""
        return self.ydata.values

    @property
    def yerr(self):
        """np.ndarray: The errors of the x data set"""
        return self.ydata.errors

    @property
    def xname(self):
        """str: Name of the xdata set"""
        return self.xdata.name

    @xname.setter
    def xname(self, name):
        if not isinstance(name, str):
            raise TypeError("Cannot set xname to \"{}\"".format(type(name).__name__))
        self.xdata.name = name

    @property
    def xunit(self):
        """str: Unit of the xdata set"""
        return self.xdata.unit

    @xunit.setter
    def xunit(self, unit):
        if not isinstance(unit, str):
            raise TypeError("Cannot set xunit to \"{}\"".format(type(unit).__name__))
        self.xdata.unit = unit

    @property
    def yname(self):
        """str: Name of the ydata set"""
        return self.ydata.name

    @yname.setter
    def yname(self, name):
        if not isinstance(name, str):
            raise TypeError("Cannot set yname to \"{}\"".format(type(name).__name__))
        self.ydata.name = name

    @property
    def yunit(self):
        """str: Unit of the ydata set"""
        return self.ydata.unit

    @yunit.setter
    def yunit(self, unit):
        if not isinstance(unit, str):
            raise TypeError("Cannot set yunit to \"{}\"".format(type(unit).__name__))
        self.ydata.unit = unit

[docs]    def fit(self, model, **kwargs):
        """Fits the current dataset to a model

        See Also:
            The fit function in the fitting module of QExPy

        """
        import qexpy.fitting as fitting  # pylint: disable=cyclic-import
        return fitting.fit(self, model, **kwargs)

    @staticmethod
    def __wrap_data(data, error, unit, name) -> ExperimentalValueArray:
        """Wraps the data set into ExperimentalValueArray objects"""

        if isinstance(data, ExperimentalValueArray):
            if name:
                data.name = name
            if unit:
                data.unit = unit
            if error is not None:
                error_array = _get_error_array_helper(data, error, None)
                for x, e in zip(data, error_array):
                    x.error = e
            return data
        if isinstance(data, ARRAY_TYPES):
            return ExperimentalValueArray(data, error, unit=unit, name=name)

        raise IllegalArgumentError("Cannot create XYDataSet with the given arguments.")


def _get_error_array_helper(data, error, rel_error):
    """Helper method that produces an error array for an ExperimentalValueArray"""

    if error is None and rel_error is None:
        error_array = [0.0] * len(data)
    elif isinstance(error, Real):
        error_array = [float(error)] * len(data)
    elif isinstance(error, ARRAY_TYPES) and all(isinstance(err, Real) for err in error):
        if len(error) != len(data):
            raise ValueError("The length of the error data arrays don't match.")
        error_array = np.asarray(error)
    elif isinstance(rel_error, Real):
        error_array = float(rel_error) * abs(data)
    elif isinstance(rel_error, ARRAY_TYPES) and all(isinstance(e, Real) for e in rel_error):
        if len(rel_error) != len(data):
            raise ValueError("The length of the relative error and data arrays don't match.")
        error_array = rel_error * abs(data)
    else:
        raise TypeError("The error or relative error provided is invalid!")

    if any(err < 0 for err in error_array):
        raise ValueError("The uncertainty of any measurement cannot be negative!")

    return error_array