diff --git a/qiskit_experiments/curve_analysis/__init__.py b/qiskit_experiments/curve_analysis/__init__.py index efcf552de8..523fbebb69 100644 --- a/qiskit_experiments/curve_analysis/__init__.py +++ b/qiskit_experiments/curve_analysis/__init__.py @@ -10,36 +10,482 @@ # copyright notice, and modified files need to carry a notice indicating # that they have been altered from the originals. -""" +r""" ========================================================= Curve Analysis (:mod:`qiskit_experiments.curve_analysis`) ========================================================= .. currentmodule:: qiskit_experiments.curve_analysis -Classes -======= +Curve analysis provides the analysis base class for a variety of experiments with +a single experimental parameter sweep. This analysis subclasses can override +several class attributes to customize the behavior from data processing to post-processing, +including providing systematic initial guess for parameters tailored to the experiment. +Here we describe how code developers can create new analysis inheriting from the base class. + + +.. _curve_analysis_overview: + +Curve Analysis Overview +======================= + +The base class :class:`CurveAnalysis` implements the multi-objective optimization on +different sets of experiment results. A single experiment can define sub-experiments +consisting of multiple circuits which are tagged with common metadata, +and curve analysis sorts the experiment results based on the circuit metadata. + +This is an example of showing the abstract data structure of typical curve analysis experiment: + +.. code-block:: none + :emphasize-lines: 1,10,19 + + "experiment" + - circuits[0] (x=x1_A, "series_A") + - circuits[1] (x=x1_B, "series_B") + - circuits[2] (x=x2_A, "series_A") + - circuits[3] (x=x2_B, "series_B") + - circuits[4] (x=x3_A, "series_A") + - circuits[5] (x=x3_B, "series_B") + - ... + + "experiment data" + - data[0] (y1_A, "series_A") + - data[1] (y1_B, "series_B") + - data[2] (y2_A, "series_A") + - data[3] (y2_B, "series_B") + - data[4] (y3_A, "series_A") + - data[5] (y3_B, "series_B") + - ... + + "analysis" + - "series_A": y_A = f_A(x_A; p0, p1, p2) + - "series_B": y_B = f_B(x_B; p0, p1, p2) + - fixed parameters {p1: v} + +Here the experiment runs two subset of experiments, namely, series A and series B. +The analysis defines corresponding fit models :math:`f_A(x_A)` and :math:`f_B(x_B)`. +Data extraction function in the analysis creates two datasets, :math:`(x_A, y_A)` +for the series A and :math:`(x_B, y_B)` for the series B, from the experiment data. +Optionally, the curve analysis can fix certain parameters during the fitting. +In this example, :math:`p_1 = v` remains unchanged during the fitting. + +The curve analysis aims at solving the following optimization problem: + +.. math:: + + \Theta_{\mbox{opt}} = \arg\min_{\Theta_{\rm fit}} \sigma^{-2} (F(X, \Theta)-Y)^2, + +where :math:`F` is the composite objective function defined on the full experiment data +:math:`(X, Y)`, where :math:`X = x_A \oplus x_B` and :math:`Y = y_A \oplus y_B`. +This objective function can be described by two fit functions as follows. + +.. math:: + + F(X, \Theta) = f_A(x_A, \theta_A) \oplus f_B(x_B, \theta_B). + +The solver conducts the least square curve fitting against this objective function +and returns the estimated parameters :math:`\Theta_{\mbox{opt}}` +that minimizes the reduced chi-squared value. +The parameters to be evaluated are :math:`\Theta = \Theta_{\rm fit} \cup \Theta_{\rm fix}`, +where :math:`\Theta_{\rm fit} = \theta_A \cup \theta_B`. +Since series A and B share the parameters in this example, :math:`\Theta_{\rm fit} = \{p_0, p_2\}`, +and the fixed parameters are :math:`\Theta_{\rm fix} = \{ p_1 \}` as mentioned. +Thus, :math:`\Theta = \{ p_0, p_1, p_2 \}`. + +Experiment for each series can perform individual parameter sweep for :math:`x_A` and :math:`x_B`, +and experiment data yield outcomes :math:`y_A` and :math:`y_B`, which might be different size. +Data processing function may also compute :math:`\sigma_A` and :math:`\sigma_B` which are +the uncertainty of outcomes arising from the sampling error or measurement error. + +More specifically, the curve analysis defines following data model. + +- Series: Definition of the single curve. Every series may define unique + filter keyword arguments for data sorting, a fit function with parameters, + and preferred style for fit outcome visualization. + +- Group: List of series. Fit functions defined under the group must share the + fit parameters. Fit functions in the group are simultaneously fit to + generate a single fit result. + +To manage this structure, curve analysis provides a special dataclass :class:`SeriesDef` +that represents a model configuration for a single curve data. +Based on this information, the curve analysis automatically builds the optimization routine. +Finally, the analysis outputs a set of :class:`AnalysisResultData` entries +for important fit outcomes along with a single Matplotlib figure of the fit curves +with the measured data points. + +With this baseclass a developer can avoid writing boilerplate code in +various curve analyses subclass and one can quickly write up +the analysis code for a particular experiment. + + +.. _curve_analysis_define_series: + +Defining New Series +=================== + +You can intuitively write the definition of a new series, as shown below: + +.. code-block:: python3 + + from qiskit_experiments.curve_analysis import SeriesDef, fit_function + + SeriesDef( + fit_func=lambda x, p0, p1, p2: fit_function.exponential_decay( + x, amp=p0, lamb=p1, baseline=p2 + ), + model_description="p0 * exp(-p1 * x) + p2", + ) + +The minimum field you must fill with is the ``fit_func``, which is a callback function used +with the optimization solver. Here you must call one of the fit functions from the module +:mod:`qiskit_experiments.curve_analysis.fit_function` because they implement +special logic to compute error propagation. +Note that argument name of the fit function is important because +the signature of the provided fit function is inspected behind the scenes and +used as a parameter name of the analysis result instance. +This name may be used to populate your experiment database with the result. + +Optionally you can set ``model_description`` which is a string representation of your +fitting model that will be passed to the analysis result as a part of metadata. +This instance should be set to :attr:`CurveAnalysis.__series__` as a python list. + +Here is another example how to implement multi-objective optimization task: + +.. code-block:: python3 + + [ + SeriesDef( + name="my_experiment1", + fit_func=lambda x, p0, p1, p2, p3: fit_function.exponential_decay( + x, amp=p0, lamb=p1, baseline=p3 + ), + filter_kwargs={"tag": 1}, + plot_color="red", + plot_symbol="^", + ), + SeriesDef( + name="my_experiment2", + fit_func=lambda x, p0, p1, p2, p3: fit_function.exponential_decay( + x, amp=p0, lamb=p2, baseline=p3 + ), + filter_kwargs={"tag": 2}, + plot_color="blue", + plot_symbol="o", + ), + ] + +Note that now you also need to provide ``name`` and ``filter_kwargs`` to +distinguish the entries and filter the corresponding dataset from the experiment data. +Optionally, you can provide ``plot_color`` and ``plot_symbol`` to visually +separate two curves in the plot. In this model, you have 4 parameters ``[p0, p1, p2, p3]`` +and the two curves share ``p0`` (``p3``) for ``amp`` (``baseline``) of +the :func:`exponential_decay` fit function. +Here one should expect the experiment data will have two classes of data with metadata +``"tag": 1`` and ``"tag": 2`` for ``my_experiment1`` and ``my_experiment2``, respectively. + +By using this model, one can flexibly set up your fit model. Here is another example: + +.. code-block:: python3 + + [ + SeriesDef( + name="my_experiment1", + fit_func=lambda x, p0, p1, p2, p3: fit_function.cos( + x, amp=p0, freq=p1, phase=p2, baseline=p3 + ), + filter_kwargs={"tag": 1}, + plot_color="red", + plot_symbol="^", + ), + SeriesDef( + name="my_experiment2", + fit_func=lambda x, p0, p1, p2, p3: fit_function.sin( + x, amp=p0, freq=p1, phase=p2, baseline=p3 + ), + filter_kwargs={"tag": 2}, + plot_color="blue", + plot_symbol="o", + ), + ] + +You have the same set of fit parameters for two curves, but now you fit two datasets +with different trigonometric functions. + + +.. _curve_analysis_fixed_param: + +Fitting with Fixed Parameters +============================= + +You can also remain certain parameters unchanged during the fitting by specifying +the parameter names in the analysis option ``fixed_parameters``. +This feature is useful especially when you want to define a subclass of +a particular analysis class. + +.. code-block:: python3 + + class AnalysisA(CurveAnalysis): + + __series__ = [ + SeriesDef( + fit_func=lambda x, p0, p1, p2: fit_function.exponential_decay( + x, amp=p0, lamb=p1, baseline=p2 + ), + ), + ] + + class AnalysisB(AnalysisA): + + @classmethod + def _default_options(cls) -> Options: + options = super()._default_options() + options.fixed_parameters = {"p0": 3.0} + + return options + +The parameter specified in ``fixed_parameters`` is exluded from the fitting. +This code will give you identical fit model to the one defined in the following class: + +.. code-block:: python3 + + class AnalysisB(CurveAnalysis): + + __series__ = [ + SeriesDef( + fit_func=lambda x, p1, p2: fit_function.exponential_decay( + x, amp=3.0, lamb=p1, baseline=p2 + ), + ), + ] + +However, note that you can also inherit other features, e.g. the algorithm to +generate initial guesses for parameters, from the :class:`AnalysisA` in the first example. +On the other hand, in the latter case, you need to manually copy and paste +every logic defined in the :class:`AnalysisA`. + +.. _curve_analysis_workflow: + +Cureve Analysis Workflow +======================== + +Typically curve analysis performs fitting as follows. +This workflow is defined in the method :meth:`CurveAnalysis._run_analysis`. + +1. Initialization + +Curve analysis calls :meth:`_initialization` method where it initializes +some internal states and optionally populate analysis options +with the input experiment data. +In some case it may train the data processor with fresh outcomes. +A developer can override this method to perform initialization of analysis-specific variables. + +2. Data processing + +Curve analysis calls :meth:`_run_data_processing` method where +the data processor in the analysis option is internally called. +This consumes input experiment results and creates :class:`CurveData` dataclass. +Then :meth:`_format_data` method is called with the processed dataset to format it. +By default, the formatter takes average of the outcomes in the processed dataset +over the same x values, followed by the sorting in the ascending order of x values. +This allows the analysis to easily estimate the slope of the curves to +create algorithmic initial guess of fit parameters. +A developer can inject extra data processing, for example, filtering, smoothing, +or elimination of outliers for better fitting. + +3. Fitting + +Curve analysis calls :meth:`_run_curve_fit` method which is the core functionality of the fitting. +The another method :meth:`_generate_fit_guesses` is internally called to +prepare the initial guess and parameter boundary with respect to the formatted data. +A developer usually override this method to provide better initial guess +tailored to the defined fit model or type of the associated experiment. +See :ref:`curve_analysis_init_guess` for more details. +A developer can also override the entire :meth:`_run_curve_fit` method to apply +custom fitting algorithms. This method must return :class:`FitData` dataclass. + +4. Post processing + +Curve analysis runs several postprocessing against to the fit outcome. +It calls :meth:`_create_analysis_results` to create :class:`AnalysisResultData` class +for the fitting parameters of interest. A developer can inject a custom code to +compute custom quantities based on the raw fit parameters. +See :ref:`curve_analysis_results` for details. +Afterwards, the analysis draws several curves in the Matplotlib figure. +User can set custom drawer to the option ``curve_drawer``. +The drawer defaults to the :class:`MplCurveDrawer`. +Finally, it returns the list of created analysis results and Matplotlib figure. + -These are the base class and internal data structures to implement a curve analysis. +.. _curve_analysis_init_guess: + +Providing Initial Guesses +========================= + +When fit is performed without any prior information of parameters, it usually +falls into unsatisfactory result. +User can provide initial guesses and boundaries for the fit parameters +through analysis options ``p0`` and ``bounds``. +These values are the dictionary keyed on the parameter name, +and one can get the list of parameters with the :attr:`CurveAnalysis.parameters`. +Each boundary value can be a tuple of float representing min and max value. + +Apart from user provided guesses, the analysis can systematically generate +those values with the method :meth:`_generate_fit_guesses` which is called with +:class:`CurveData` dataclass. If the analysis contains multiple series definitions, +we can get the subset of curve data with :meth:`CurveData.get_subset_of` with +the name of the series. +A developer can implement the algorithm to generate initial guesses and boundaries +by using this curve data object, which will be provided to the fitter. +Note that there are several common initial guess estimators available in +:mod:`qiskit_experiments.curve_analysis.guess`. + +The :meth:`_generate_fit_guesses` also receives :class:`FitOptions` instance ``user_opt``, +which contains user provided guesses and boundaries. +This is dictionary-like object consisting of sub-dictionaries for +initial guess ``.p0``, boundary ``.bounds``, and extra options for the fitter. +Note that :class:`CurveAnalysis` uses SciPy `curve_fit`_ as the least square solver. +See the API documentation for available options. + +The :class:`FitOptions` class implements convenient method :meth:`set_if_empty` to manage +conflict with user provided values, i.e. user provided values have higher priority, +thus systematically generated values cannot override user values. + +.. code-block:: python3 + + def _generate_fit_guesses(self, user_opt, curve_data): + + opt1 = user_opt.copy() + opt1.p0.set_if_empty(p1=3) + opt1.bounds = set_if_empty(p1=(0, 10)) + opt1.add_extra_options(method="lm") + + opt2 = user_opt.copy() + opt2.p0.set_if_empty(p1=4) + + return [opt1, opt2] + +Here you created two options with different ``p1`` values. +If multiple options are returned like this, the :meth:`_run_curve_fit` method +attempts to fit with all provided options and finds the best outcome with +the minimum reduced chi-square value. +When the fit model contains some parameter that cannot be easily estimated from the +curve data, you can create multiple options with varying the initial guess to +let the fitter find the most reasonable parameters to explain the model. +This allows you to avoid analysis failure with the poor initial guesses. + +.. _curve_fit: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html + + +.. _curve_analysis_quality: + +Evaluate Fit Quality +==================== + +A subclass can override :meth:`_evaluate_quality` method to +provide an algorithm to evaluate quality of the fitting. +This method is called with the :class:`FitData` object which contains +fit parameters and the reduced chi-squared value. +Qiskit Experiments often uses the empirical criterion chi-squared < 3 as a good fitting. + + +.. _curve_analysis_results: + +Curve Analysis Results +====================== + +Once the best fit parameters are found, the :meth:`_create_analysis_results` method is +called with the same :class:`FitData` object. +By default :class:`CurveAnalysis` only creates a single entry ``@Parameters_``. +This entry consists of fit parameter values with statistical information of the fitting. + +If you want to create an analysis result entry for the particular parameter, +you can override the analysis options ``result_parameters``. +By using :class:`ParameterRepr` representation, you can rename the parameter in the entry. + +.. code-block:: python3 + + from qiskit_experiments.curve_analysis import ParameterRepr + + def _default_options(cls) -> Options: + options = super()._default_options() + options.result_parameters = [ParameterRepr("p0", "amp", "Hz")] + + return options + +Here the first argument ``p0`` is the target parameter defined in the series definition, +``amp`` is the representation of ``p0`` in the result entry, +and ``Hz`` is the optional string for the unit of the value if available. + +Not only returning the fit parameters, you can also compute new quantities +by combining multiple fit parameters. +This can be done by overriding the :meth:`_create_analysis_results` method. + +.. code-block:: python3 + + from qiskit_experiments.framework import AnalysisResultData + + def _create_analysis_results(self, fit_data, quality, **metadata): + + outcomes = super()._create_analysis_results(fit_data, **metadata) + + p0 = fit_data.fitval("p0") + p1 = fit_data.fitval("p1") + + extra_entry = AnalysisResultData( + name="p01", + value=p0 * p1, + quality=quality, + extra=metadata, + ) + outcomes.append(extra_entry) + + return outcomes + +Note that both ``p0`` and ``p1`` are `ufloat`_ object consisting of +a nominal value and an error value which assumes the standard deviation. +Since this object natively supports error propagation, +you don't need to manually recompute the error of new value. + +.. _ufloat: https://pythonhosted.org/uncertainties/user_guide.html + + +If there is any missing feature, you can write a feature request as an issue in our +`GitHub `_. + + +Base Classes +============ .. autosummary:: :toctree: ../stubs/ + BaseCurveAnalysis CurveAnalysis + +Data Classes +============ + +.. autosummary:: + :toctree: ../stubs/ + SeriesDef CurveData FitData ParameterRepr FitOptions - MplCurveDrawer -Standard Analysis -================= +Visualization +============= + +.. autosummary:: + :toctree: ../stubs/ -These classes provide typical analysis functionality. -These are expected to be reused in multiple experiments. -By overriding default options from the class method :meth:`_default_analysis_options` of -your experiment class, you can still tailor the standard analysis classes to your experiment. + BaseCurveDrawer + MplCurveDrawer + +Standard Analysis Library +========================= .. autosummary:: :toctree: ../stubs/ @@ -52,20 +498,6 @@ GaussianAnalysis ErrorAmplificationAnalysis -Functions -========= - -These are the helper functions to realize a part of curve fitting functionality. - -Curve Fitting -************* - -.. autosummary:: - :toctree: ../stubs/ - - curve_fit - multi_curve_fit - Fit Functions ************* .. autosummary:: @@ -82,8 +514,8 @@ fit_function.bloch_oscillation_y fit_function.bloch_oscillation_z -Initial Guess -************* +Initial Guess Estimators +************************ .. autosummary:: :toctree: ../stubs/ @@ -97,15 +529,6 @@ guess.min_height guess.oscillation_exp_decay -Visualization -************* -.. autosummary:: - :toctree: ../stubs/ - - plot_curve_fit - plot_errorbar - plot_scatter - Utilities ********* .. autosummary:: @@ -113,6 +536,7 @@ is_error_not_significant """ +from .base_curve_analysis import BaseCurveAnalysis from .curve_analysis import CurveAnalysis, is_error_not_significant from .curve_data import CurveData, SeriesDef, FitData, ParameterRepr, FitOptions from .curve_fit import ( @@ -121,7 +545,7 @@ process_curve_data, process_multi_curve_data, ) -from .visualization import MplCurveDrawer +from .visualization import BaseCurveDrawer, MplCurveDrawer from . import guess from . import fit_function diff --git a/qiskit_experiments/curve_analysis/base_curve_analysis.py b/qiskit_experiments/curve_analysis/base_curve_analysis.py new file mode 100644 index 0000000000..387c538648 --- /dev/null +++ b/qiskit_experiments/curve_analysis/base_curve_analysis.py @@ -0,0 +1,547 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2022. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +""" +Base class of curve analysis. +""" + +import warnings + +from abc import ABC, abstractmethod +from typing import List, Dict, Union + +import numpy as np +from uncertainties import unumpy as unp + +from qiskit_experiments.framework import BaseAnalysis, AnalysisResultData, Options, ExperimentData +from qiskit_experiments.data_processing import DataProcessor +from qiskit_experiments.data_processing.processor_library import get_processor +from qiskit_experiments.data_processing.exceptions import DataProcessorError +from qiskit_experiments.exceptions import AnalysisError + +from .curve_data import CurveData, SeriesDef, FitData, ParameterRepr, FitOptions +from .data_processing import multi_mean_xy_data, data_sort +from .curve_fit import multi_curve_fit +from .visualization import MplCurveDrawer, BaseCurveDrawer + +PARAMS_ENTRY_PREFIX = "@Parameters_" +DATA_ENTRY_PREFIX = "@Data_" + + +class BaseCurveAnalysis(BaseAnalysis, ABC): + """Abstract superclass of curve analysis base classes. + + Note that this class doesn't define :meth:`_run_analysis` method, + and no actual fitting protocol is implemented in this base class. + However, this class defines several common methods that can be reused. + A curve analysis subclass can construct proper fitting protocol + by combining following methods, i.e. subroutines. + See :ref:`curve_analysis_workflow` for how these subroutines are called. + + .. rubric:: _generate_fit_guesses + + This method creates initial guesses for the fit parameters. + This might be overridden by subclass. + See :ref:`curve_analysis_init_guess` for details. + + .. rubric:: _format_data + + This method consumes the processed dataset and outputs the formatted dataset. + By default, this method takes the average of y values over + the same x values and then sort the entire data by x values. + + .. rubric:: _evaluate_quality + + This method evaluates the quality of the fit based on the fit result. + This returns "good" when reduced chi-squared is less than 3.0. + Usually it returns string "good" or "bad" according to the evaluation. + This criterion can be updated by subclass. + + .. rubric:: _run_data_processing + + This method performs data processing and returns the processed dataset. + By default, it internally calls :class:`DataProcessor` instance from the analysis options + and processes experiment data payload to create Y data with uncertainty. + X data and other metadata are generated within this method by inspecting the + circuit metadata. The series classification is also performed by based upon the + matching of circuit metadata and :attr:`SeriesDef.filter_kwargs`. + + .. rubric:: _run_curve_fit + + This method performs the fitting with predefined fit models and the formatted dataset. + This method internally calls :meth:`_generate_fit_guesses` method. + Note that this is a core functionality of the :meth:`_run_analysis` method, + that creates fit result object from the formatted dataset. + + .. rubric:: _create_analysis_results + + This method to creates analysis results for important fit parameters + that might be defined by analysis options ``result_parameters``. + In addition, another entry for all fit parameters is created when + the analysis option ``return_fit_parameters`` is ``True``. + + .. rubric:: _create_curve_data + + This method to creates analysis results for the formatted dataset, i.e. data used for the fitting. + Entries are created when the analysis option ``return_data_points`` is ``True``. + If analysis consists of multiple series, analysis result is created for + each curve data in the series definitions. + + .. rubric:: _initialize + + This method initializes analysis options against input experiment data. + Usually this method is called before other methods are called. + + """ + + @property + @abstractmethod + def parameters(self) -> List[str]: + """Return parameters estimated by this analysis.""" + + @property + def drawer(self) -> BaseCurveDrawer: + """A short-cut for curve drawer instance.""" + return self._options.curve_drawer + + @classmethod + def _default_options(cls) -> Options: + """Return default analysis options. + + Analysis Options: + curve_drawer (BaseCurveDrawer): A curve drawer instance to visualize + the analysis result. + plot_raw_data (bool): Set ``True`` to draw processed data points, + dataset without formatting, on canvas. This is ``False`` by default. + plot (bool): Set ``True`` to create figure for fit result. + This is ``True`` by default. + return_fit_parameters (bool): Set ``True`` to return all fit model parameters + with details of the fit outcome. Default to ``True``. + return_data_points (bool): Set ``True`` to include in the analysis result + the formatted data points given to the fitter. Default to ``False``. + data_processor (Callable): A callback function to format experiment data. + This can be a :class:`~qiskit_experiments.data_processing.DataProcessor` + instance that defines the `self.__call__` method. + normalization (bool) : Set ``True`` to normalize y values within range [-1, 1]. + Default to ``False``. + p0 (Dict[str, float]): Initial guesses for the fit parameters. + The dictionary is keyed on the fit parameter names. + bounds (Dict[str, Tuple[float, float]]): Boundary of fit parameters. + The dictionary is keyed on the fit parameter names and + values are the tuples of (min, max) of each parameter. + curve_fitter_options (Dict[str, Any]) Options that are passed to the + scipy curve fit which performs the least square fitting on the experiment results. + x_key (str): Circuit metadata key representing a scanned value. + result_parameters (List[Union[str, ParameterRepr]): Parameters reported in the + database as a dedicated entry. This is a list of parameter representation + which is either string or ParameterRepr object. If you provide more + information other than name, you can specify + ``[ParameterRepr("alpha", "\u03B1", "a.u.")]`` for example. + The parameter name should be defined in the series definition. + Representation should be printable in standard output, i.e. no latex syntax. + extra (Dict[str, Any]): A dictionary that is appended to all database entries + as extra information. + fixed_parameters (Dict[str, Any]): Fitting model parameters that are fixed + during the curve fitting. This should be provided with default value + keyed on one of the parameter names in the series definition. + """ + options = super()._default_options() + + options.curve_drawer = MplCurveDrawer() + options.plot_raw_data = False + options.plot = True + options.return_fit_parameters = True + options.return_data_points = False + options.data_processor = None + options.normalization = False + options.x_key = "xval" + options.result_parameters = [] + options.extra = {} + options.curve_fitter_options = {} + options.p0 = {} + options.bounds = {} + options.fixed_parameters = {} + + # Set automatic validator for particular option values + options.set_validator(field="data_processor", validator_value=DataProcessor) + options.set_validator(field="curve_drawer", validator_value=BaseCurveDrawer) + + return options + + def set_options(self, **fields): + """Set the analysis options for :meth:`run` method. + + Args: + fields: The fields to update the options + + Raises: + KeyError: When removed option ``curve_fitter`` is set. + """ + # TODO remove this in Qiskit Experiments v0.4 + if "curve_plotter" in fields: + warnings.warn( + "The analysis option 'curve_plotter' has been deprecated. " + "The option is replaced with 'curve_drawer' that takes 'MplCurveDrawer' instance. " + "If this is a loaded analysis, please save this instance again to update option value. " + "The 'curve_plotter' argument along with this warning will be removed " + "in Qiskit Experiments 0.4.", + DeprecationWarning, + stacklevel=2, + ) + del fields["curve_plotter"] + + if "curve_fitter" in fields: + warnings.warn( + "Setting curve fitter to analysis options has been deprecated and " + "the option has been removed. The fitter setting is dropped. " + "Now you can directly override '_run_curve_fit' method to apply custom fitter. " + "The `curve_fitter` argument along with this warning will be removed " + "in Qiskit Experiments 0.4.", + DeprecationWarning, + stacklevel=2, + ) + del fields["curve_fitter"] + + # pylint: disable=no-member + draw_options = set(self.drawer.options.__dict__.keys()) | {"style"} + deprecated = draw_options & fields.keys() + if any(deprecated): + warnings.warn( + f"Option(s) {deprecated} have been moved to draw_options and will be removed soon. " + "Use self.drawer.set_options instead. " + "If this is a loaded analysis, please save this instance again to update option value. " + "These arguments along with this warning will be removed " + "in Qiskit Experiments 0.4.", + DeprecationWarning, + stacklevel=2, + ) + draw_options = dict() + for depopt in deprecated: + if depopt == "style": + for k, v in fields.pop("style").items(): + draw_options[k] = v + else: + draw_options[depopt] = fields.pop(depopt) + self.drawer.set_options(**draw_options) + + super().set_options(**fields) + + def _generate_fit_guesses( + self, + user_opt: FitOptions, + curve_data: CurveData, # pylint: disable=unused-argument + ) -> Union[FitOptions, List[FitOptions]]: + """Create algorithmic guess with analysis options and curve data. + + Args: + user_opt: Fit options filled with user provided guess and bounds. + curve_data: Formatted data collection to fit. + + Returns: + List of fit options that are passed to the fitter function. + """ + return user_opt + + def _format_data( + self, + curve_data: CurveData, + ) -> CurveData: + """Postprocessing for the processed dataset. + + Args: + curve_data: Processed dataset created from experiment results. + + Returns: + Formatted data. + """ + # take average over the same x value by keeping sigma + data_allocation, xdata, ydata, sigma, shots = multi_mean_xy_data( + series=curve_data.data_allocation, + xdata=curve_data.x, + ydata=curve_data.y, + sigma=curve_data.y_err, + shots=curve_data.shots, + method="shots_weighted", + ) + + # sort by x value in ascending order + data_allocation, xdata, ydata, sigma, shots = data_sort( + series=data_allocation, + xdata=xdata, + ydata=ydata, + sigma=sigma, + shots=shots, + ) + + return CurveData( + x=xdata, + y=ydata, + y_err=sigma, + shots=shots, + data_allocation=data_allocation, + labels=curve_data.labels, + ) + + def _evaluate_quality( + self, + fit_data: FitData, + ) -> Union[str, None]: + """Evaluate quality of the fit result. + + Args: + fit_data: Fit outcome. + + Returns: + String that represents fit result quality. Usually "good" or "bad". + """ + if fit_data.reduced_chisq < 3.0: + return "good" + return "bad" + + def _run_data_processing( + self, + raw_data: List[Dict], + series: List[SeriesDef], + ) -> CurveData: + """Perform data processing from the experiment result payload. + + Args: + raw_data: Payload in the experiment data. + series: List of series definition defining filtering condition. + + Returns: + Processed data that will be sent to the formatter method. + + Raises: + DataProcessorError: When key for x values is not found in the metadata. + """ + x_key = self.options.x_key + + try: + xdata = np.asarray([datum["metadata"][x_key] for datum in raw_data], dtype=float) + except KeyError as ex: + raise DataProcessorError( + f"X value key {x_key} is not defined in circuit metadata." + ) from ex + + ydata = self.options.data_processor(raw_data) + shots = np.asarray([datum.get("shots", np.nan) for datum in raw_data]) + + def _matched(metadata, **filters): + try: + return all(metadata[key] == val for key, val in filters.items()) + except KeyError: + return False + + data_allocation = np.full(xdata.size, -1, dtype=int) + for sind, series_def in enumerate(series): + matched_inds = np.asarray( + [_matched(d["metadata"], **series_def.filter_kwargs) for d in raw_data], dtype=bool + ) + data_allocation[matched_inds] = sind + + return CurveData( + x=xdata, + y=unp.nominal_values(ydata), + y_err=unp.std_devs(ydata), + shots=shots, + data_allocation=data_allocation, + labels=[s.name for s in series], + ) + + def _run_curve_fit( + self, + curve_data: CurveData, + series: List[SeriesDef], + ) -> Union[None, FitData]: + """Perform curve fitting on given data collection and fit models. + + Args: + curve_data: Formatted data to fit. + series: A list of fit models. + + Returns: + The best fitting outcome with minimum reduced chi-squared value. + """ + # Create a list of initial guess + default_fit_opt = FitOptions( + parameters=self.parameters, + default_p0=self.options.p0, + default_bounds=self.options.bounds, + **self.options.curve_fitter_options, + ) + try: + fit_options = self._generate_fit_guesses(default_fit_opt, curve_data) + except TypeError: + warnings.warn( + "Calling '_generate_fit_guesses' method without curve data has been " + "deprecated and will be prohibited after 0.4. " + "Update the method signature of your custom analysis class.", + DeprecationWarning, + ) + # pylint: disable=no-value-for-parameter + fit_options = self._generate_fit_guesses(default_fit_opt) + if isinstance(fit_options, FitOptions): + fit_options = [fit_options] + + # Run fit for each configuration + fit_results = [] + for fit_opt in set(fit_options): + try: + fit_result = multi_curve_fit( + funcs=[sdef.fit_func for sdef in series], + series=curve_data.data_allocation, + xdata=curve_data.x, + ydata=curve_data.y, + sigma=curve_data.y_err, + **fit_opt.options, + ) + fit_results.append(fit_result) + except AnalysisError: + # Some guesses might be too far from the true parameters and may thus fail. + # We ignore initial guesses that fail and continue with the next fit candidate. + pass + + # Find best value with chi-squared value + if len(fit_results) == 0: + warnings.warn( + "All initial guesses and parameter boundaries failed to fit the data. " + "Please provide better initial guesses or fit parameter boundaries.", + UserWarning, + ) + # at least return raw data points rather than terminating + return None + + return sorted(fit_results, key=lambda r: r.reduced_chisq)[0] + + def _create_analysis_results( + self, + fit_data: FitData, + quality: str, + **metadata, + ) -> List[AnalysisResultData]: + """Create analysis results for important fit parameters. + + Args: + fit_data: Fit outcome. + quality: Quality of fit outcome. + + Returns: + List of analysis result data. + """ + outcomes = [] + + # Create entry for all fit parameters + if self.options.return_fit_parameters: + fit_parameters = AnalysisResultData( + name=PARAMS_ENTRY_PREFIX + self.__class__.__name__, + value=[p.nominal_value for p in fit_data.popt], + chisq=fit_data.reduced_chisq, + quality=quality, + extra={ + "popt_keys": fit_data.popt_keys, + "dof": fit_data.dof, + "covariance_mat": fit_data.pcov, + **metadata, + }, + ) + outcomes.append(fit_parameters) + + # Create entries for important parameters + for param_repr in self.options.result_parameters: + if isinstance(param_repr, ParameterRepr): + p_name = param_repr.name + p_repr = param_repr.repr or param_repr.name + unit = param_repr.unit + else: + p_name = param_repr + p_repr = param_repr + unit = None + + fit_val = fit_data.fitval(p_name) + if unit: + par_metadata = metadata.copy() + par_metadata["unit"] = unit + else: + par_metadata = metadata + + outcome = AnalysisResultData( + name=p_repr, + value=fit_val, + chisq=fit_data.reduced_chisq, + quality=quality, + extra=par_metadata, + ) + outcomes.append(outcome) + + return outcomes + + def _create_curve_data( + self, + curve_data: CurveData, + series: List[SeriesDef], + **metadata, + ) -> List[AnalysisResultData]: + """Create analysis results for raw curve data. + + Args: + curve_data: Formatted data that is used for the fitting. + series: List of series definition associated with the curve data. + + Returns: + List of analysis result data. + """ + samples = [] + + if not self.options.return_data_points: + return samples + + for sdef in series: + s_data = curve_data.get_subset_of(sdef.name) + raw_datum = AnalysisResultData( + name=DATA_ENTRY_PREFIX + self.__class__.__name__, + value={ + "xdata": s_data.x, + "ydata": s_data.y, + "sigma": s_data.y_err, + }, + extra={ + "name": sdef.name, + **metadata, + }, + ) + samples.append(raw_datum) + + return samples + + def _initialize( + self, + experiment_data: ExperimentData, + ): + """Initialize curve analysis with experiment data. + + This method is called ahead of other processing. + + Args: + experiment_data: Experiment data to analyze. + """ + # Initialize canvas + if self.options.plot: + self.drawer.initialize_canvas() + + # Initialize data processor + # TODO move this to base analysis in follow-up + data_processor = self.options.data_processor or get_processor(experiment_data, self.options) + + if not data_processor.is_trained: + data_processor.train(data=experiment_data.data()) + self.set_options(data_processor=data_processor) diff --git a/qiskit_experiments/curve_analysis/curve_analysis.py b/qiskit_experiments/curve_analysis/curve_analysis.py index 581ab95e05..8994995e95 100644 --- a/qiskit_experiments/curve_analysis/curve_analysis.py +++ b/qiskit_experiments/curve_analysis/curve_analysis.py @@ -15,220 +15,30 @@ """ # pylint: disable=invalid-name -import copy import dataclasses import functools -import inspect import warnings -from abc import ABC -from typing import Dict, List, Tuple, Callable, Union, Optional +from typing import Dict, List, Tuple, Union, Optional import numpy as np -import uncertainties -from uncertainties import unumpy as unp - +from uncertainties import unumpy as unp, UFloat from qiskit.utils import detach_prefix -from qiskit_experiments.curve_analysis.curve_data import ( - CurveData, - SeriesDef, - FitData, - ParameterRepr, - FitOptions, -) -from qiskit_experiments.curve_analysis.curve_fit import multi_curve_fit -from qiskit_experiments.curve_analysis.data_processing import multi_mean_xy_data, data_sort -from qiskit_experiments.curve_analysis.visualization import MplCurveDrawer, BaseCurveDrawer -from qiskit_experiments.data_processing import DataProcessor -from qiskit_experiments.data_processing.exceptions import DataProcessorError -from qiskit_experiments.data_processing.processor_library import get_processor -from qiskit_experiments.exceptions import AnalysisError -from qiskit_experiments.framework import ( - BaseAnalysis, - ExperimentData, - AnalysisResultData, - Options, - AnalysisConfig, -) - -PARAMS_ENTRY_PREFIX = "@Parameters_" -DATA_ENTRY_PREFIX = "@Data_" - - -class CurveAnalysis(BaseAnalysis, ABC): - """A base class for curve fit type analysis. - - The subclasses can override class attributes to define the behavior of - data extraction and fitting. This docstring describes how code developers can - create a new curve fit analysis subclass inheriting from this base class. - - Class Attributes: - - ``__series__``: A set of data points that will be fit to the same parameters - in the fit function. If this analysis contains multiple curves, - the same number of series definitions should be listed. Each series definition - is a :class:`SeriesDef` element, that may be initialized with - - - ``fit_func``: The function to which the data will be fit. - - ``filter_kwargs``: Circuit metadata key and value associated with this curve. - The data points of the curve are extracted from ExperimentData based on - this information. - - ``name``: Name of the curve. This is arbitrary data field, but should be unique. - - ``plot_color``: String color representation of this series in the plot. - - ``plot_symbol``: String formatter of the scatter of this series in the plot. - - - ``__fixed_parameters__``: A list of parameter names fixed during the fitting. - These parameters should be provided in some way. For example, you can provide - them via experiment options or analysis options. Parameter names should be - used in the ``fit_func`` in the series definition. - - See the Examples below for more details. - - - Examples: - - **A fitting for single exponential decay curve** - - In this type of experiment, the analysis deals with a single curve. - Thus filter_kwargs and series name are not necessary defined. - - .. code-block:: - - class AnalysisExample(CurveAnalysis): - - __series__ = [ - SeriesDef( - fit_func=lambda x, p0, p1, p2: - exponential_decay(x, amp=p0, lamb=p1, baseline=p2), - ), - ] - - **A fitting for two exponential decay curve with partly shared parameter** - - In this type of experiment, the analysis deals with two curves. - We need a __series__ definition for each curve, and filter_kwargs should be - properly defined to separate each curve series. - - .. code-block:: - - class AnalysisExample(CurveAnalysis): - - __series__ = [ - SeriesDef( - name="my_experiment1", - fit_func=lambda x, p0, p1, p2, p3: - exponential_decay(x, amp=p0, lamb=p1, baseline=p3), - filter_kwargs={"experiment": 1}, - plot_color="red", - plot_symbol="^", - ), - SeriesDef( - name="my_experiment2", - fit_func=lambda x, p0, p1, p2, p3: - exponential_decay(x, amp=p0, lamb=p2, baseline=p3), - filter_kwargs={"experiment": 2}, - plot_color="blue", - plot_symbol="o", - ), - ] - - In this fit model, we have 4 parameters `p0, p1, p2, p3` and both series share - `p0` and `p3` as `amp` and `baseline` of the `exponential_decay` fit function. - Parameter `p1` (`p2`) is only used by `my_experiment1` (`my_experiment2`). - Both series have same fit function in this example. - - - **A fitting for two trigonometric curves with the same parameter** - - In this type of experiment, the analysis deals with two different curves. - However the parameters are shared with both functions. - - .. code-block:: - class AnalysisExample(CurveAnalysis): - - __series__ = [ - SeriesDef( - name="my_experiment1", - fit_func=lambda x, p0, p1, p2, p3: - cos(x, amp=p0, freq=p1, phase=p2, baseline=p3), - filter_kwargs={"experiment": 1}, - plot_color="red", - plot_symbol="^", - ), - SeriesDef( - name="my_experiment2", - fit_func=lambda x, p0, p1, p2, p3: - sin(x, amp=p0, freq=p1, phase=p2, baseline=p3), - filter_kwargs={"experiment": 2}, - plot_color="blue", - plot_symbol="o", - ), - ] - - In this fit model, we have 4 parameters `p0, p1, p2, p3` and both series share - all parameters. However, these series have different fit curves, i.e. - `my_experiment1` (`my_experiment2`) uses the `cos` (`sin`) fit function. - - - **A fitting with fixed parameter** - - In this type of experiment, we can provide fixed fit function parameter. - This parameter should be assigned via analysis options - and not passed to the fitter function. - - .. code-block:: - - class AnalysisExample(CurveAnalysis): - - __series__ = [ - SeriesDef( - fit_func=lambda x, p0, p1, p2: - exponential_decay(x, amp=p0, lamb=p1, baseline=p2), - ), - ] - - __fixed_parameters__ = ["p1"] - - You can add arbitrary number of parameters to the class variable - ``__fixed_parameters__`` from the fit function arguments. - This parameter should be defined with the fit functions otherwise the analysis - instance cannot be created. In above example, parameter ``p1`` should be also - defined in the analysis options. This parameter will be excluded from the fit parameters - and thus will not appear in the analysis result. - - Notes: - This CurveAnalysis class provides several private methods that subclasses can override. - - - Customize pre-data processing: - Override :meth:`~self._format_data`. For example, here you can apply smoothing - to y values, remove outlier, or apply filter function to the data. - By default, data is sorted by x values and the measured values at the same - x value are averaged. - - - Create extra data from fit result: - Override :meth:`~self._extra_database_entry`. You need to return a list of - :class:`~qiskit_experiments.framework.analysis_result_data.AnalysisResultData` - object. This returns an empty list by default. +from qiskit_experiments.exceptions import AnalysisError +from qiskit_experiments.framework import ExperimentData, AnalysisResultData, AnalysisConfig +from qiskit_experiments.warnings import deprecated_function - - Customize fit quality evaluation: - Override :meth:`~self._evaluate_quality`. This value will be shown in the - database. You can determine the quality represented by the predefined string - "good" or "bad" based on fit result, - such as parameter uncertainty and reduced chi-squared value. - This returns ``None`` by default. This means evaluation is not performed. +from .base_curve_analysis import BaseCurveAnalysis +from .curve_data import CurveData, SeriesDef - - Customize fitting options: - Override :meth:`~self._generate_fit_guesses`. For example, here you can - calculate initial guess from experiment data and setup fitter options. - See docstring of each method for more details. +class CurveAnalysis(BaseCurveAnalysis): + """Base class for curve analysis with single curve group. - Note that other private methods are not expected to be overridden. - If you forcibly override these methods, the behavior of analysis logic is not well tested - and we cannot guarantee it works as expected (you may suffer from bugs). - Instead, you can open an issue in qiskit-experiment github to upgrade this class - with proper unittest framework. + The fit parameters from the series defined under the analysis class are all shared + and the analysis performs a single multi-objective function optimization. - https://github.com/Qiskit/qiskit-experiments/issues + See :class:`BaseCurveAnalysis` for overridable method documentation. """ #: List[SeriesDef]: List of mapping representing a data series @@ -252,11 +62,8 @@ def __init__(self): p: self.options.get(p, None) for p in self.__fixed_parameters__ } - #: List[CurveData]: Processed experiment data set. - self.__processed_data_set = list() - - #: List[int]: Index of physical qubits - self._physical_qubits = None + #: List[CurveData]: Processed experiment data set. For backward compatibility. + self.__processed_data_set = {} @classmethod def _fit_params(cls) -> List[str]: @@ -271,406 +78,39 @@ def _fit_params(cls) -> List[str]: """ fsigs = set() for series_def in cls.__series__: - fsigs.add(inspect.signature(series_def.fit_func)) + fsigs.add(series_def.signature) if len(fsigs) > 1: raise AnalysisError( "Fit functions specified in the series definition have " "different function signature. They should receive " "the same parameter set for multi-objective function fit." ) - - # remove the first function argument. this is usually x, i.e. not a fit parameter. - return list(list(fsigs)[0].parameters.keys())[1:] + return list(next(iter(fsigs))) @property def parameters(self) -> List[str]: """Return parameters of this curve analysis.""" return [s for s in self._fit_params() if s not in self.options.fixed_parameters] - @property - def drawer(self) -> BaseCurveDrawer: - """A short-cut for curve drawer instance.""" - return self._options.curve_plotter - - @classmethod - def _default_options(cls) -> Options: - """Return default analysis options. - - Analysis Options: - curve_plotter (BaseCurveDrawer): A curve drawer instance to visualize - the analysis result. - plot_raw_data (bool): Set ``True`` to draw un-formatted data points on canvas. - This is ``True`` by default. - plot (bool): Set ``True`` to create figure for fit result. - This is ``False`` by default. - curve_fitter (Callable): A callback function to perform fitting with formatted data. - See :func:`~qiskit_experiments.analysis.multi_curve_fit` for example. - data_processor (Callable): A callback function to format experiment data. - This can be a :class:`~qiskit_experiments.data_processing.DataProcessor` - instance that defines the `self.__call__` method. - normalization (bool) : Set ``True`` to normalize y values within range [-1, 1]. - p0 (Dict[str, float]): Array-like or dictionary - of initial parameters. - bounds (Dict[str, Tuple[float, float]]): Array-like or dictionary - of (min, max) tuple of fit parameter boundaries. - x_key (str): Circuit metadata key representing a scanned value. - result_parameters (List[Union[str, ParameterRepr]): Parameters reported in the - database as a dedicated entry. This is a list of parameter representation - which is either string or ParameterRepr object. If you provide more - information other than name, you can specify - ``[ParameterRepr("alpha", "\u03B1", "a.u.")]`` for example. - The parameter name should be defined in the series definition. - Representation should be printable in standard output, i.e. no latex syntax. - return_data_points (bool): Set ``True`` to return formatted XY data. - extra (Dict[str, Any]): A dictionary that is appended to all database entries - as extra information. - curve_fitter_options (Dict[str, Any]) Options that are passed to the - specified curve fitting function. - fixed_parameters (Dict[str, Any]): Fitting model parameters that are fixed - during the curve fitting. This should be provided with default value - keyed on one of the parameter names in the series definition. - """ - options = super()._default_options() - - options.curve_plotter = MplCurveDrawer() - options.plot_raw_data = False - options.plot = True - options.curve_fitter = multi_curve_fit - options.data_processor = None - options.normalization = False - options.x_key = "xval" - options.result_parameters = None - options.return_data_points = False - options.extra = dict() - options.curve_fitter_options = dict() - options.p0 = {} - options.bounds = {} - options.fixed_parameters = {} - - return options - - def set_options(self, **fields): - """Set the analysis options for :meth:`run` method. - - Args: - fields: The fields to update the options - - Raises: - KeyError: When removed option ``curve_fitter`` is set. - TypeError: When invalid drawer instance is provided. - """ - # TODO remove this in Qiskit Experiments v0.4 - if "curve_plotter" in fields and isinstance(fields["curve_plotter"], str): - plotter_str = fields["curve_plotter"] - warnings.warn( - f"The curve plotter '{plotter_str}' has been deprecated. " - "The option is replaced with 'MplCurveDrawer' instance. " - "If this is a loaded analysis, please save this instance again to update option value. " - "This warning will be removed with backport in Qiskit Experiments 0.4.", - DeprecationWarning, - stacklevel=2, - ) - fields["curve_plotter"] = MplCurveDrawer() - - if "curve_plotter" in fields and not isinstance(fields["curve_plotter"], BaseCurveDrawer): - plotter_obj = fields["curve_plotter"] - raise TypeError( - f"'{plotter_obj.__class__.__name__}' object is not valid curve drawer instance." - ) - - # pylint: disable=no-member - draw_options = set(self.drawer.options.__dict__.keys()) | {"style"} - deprecated = draw_options & fields.keys() - if any(deprecated): - warnings.warn( - f"Option(s) {deprecated} have been moved to draw_options and will be removed soon. " - "Use self.drawer.set_options instead. " - "If this is a loaded analysis, please save this instance again to update option value. " - "This warning will be removed with backport in Qiskit Experiments 0.4.", - DeprecationWarning, - stacklevel=2, - ) - draw_options = dict() - for depopt in deprecated: - if depopt == "style": - for k, v in fields.pop("style").items(): - draw_options[k] = v - else: - draw_options[depopt] = fields.pop(depopt) - self.drawer.set_options(**draw_options) - - super().set_options(**fields) - - def _generate_fit_guesses(self, user_opt: FitOptions) -> Union[FitOptions, List[FitOptions]]: - """Create algorithmic guess with analysis options and curve data. - - Subclasses can override this method. - - Subclass can access to the curve data with ``self._data()`` method. - If there are multiple series, you can get a specific series by specifying ``series_name``. - This method returns a ``CurveData`` instance, which is the `dataclass` - containing x values `.x`, y values `.y`, and sigma values `.y_err`. - - Subclasses can also access the defined analysis options with the ``self._get_option``. - For example: - - .. code-block:: - - curve_data = self._data(series_name="my_experiment1") - - if self._get_option("my_option1") == "abc": - param_a_guess = my_guess_function(curve_data.x, curve_data.y, ...) - else: - param_a_guess = ... - - user_opt.p0.set_if_empty(param_a=param_a_guess) - - Note that this subroutine can generate multiple fit options. - If multiple options are provided, the fitter will run multiple times, - i.e. once for each fit option. - The result with the best reduced chi-squared value is kept. - - Note that the argument ``user_opt`` is a collection of fitting options (initial guesses, - boundaries, and extra fitter options) with the user-provided guesses and boundaries. - The method :meth:`set_if_empty` sets the value of specified parameters of the fit options - dictionary only if the values of these parameters have not yet been assigned. - - .. code-block:: - - opt1 = user_opt.copy() - opt1.p0.set_if_empty(param_a=3) - - opt2 = user_opt.copy() - opt2.p0.set_if_empty(param_a=4) - - return [opt1, opt2] - - Note that you can also change fitter options (not only initial guesses and boundaries) - in each fit options with :meth:`add_extra_options` method. - This might be convenient to run fitting with multiple fit algorithms - or different fitting options. By default, this class uses `scipy.curve_fit` - as the fitter function. See Scipy API docs for more fitting option details. - See also :py:class:`qiskit_experiments.curve_analysis.curve_data.FitOptions` - for the behavior of the fit option instance. - - The final fit parameters are decided with the following procedure. - - 1. :class:`FitOptions` object is initialized with user options. - - 2. Algorithmic guess is generated here and override the default fit options object. - - 3. A list of fit options is returned. - - 4. Duplicated entries are eliminated. - - 5. The fitter optimizes parameters with unique fit options and outputs the chisq value. - - 6. The best fit is selected based on the minimum chisq. - - Note that in this method you don't need to worry about the user provided initial guesses - and boundaries. These values are already assigned in the ``user_opts``. - - Args: - user_opt: Fit options filled with user provided guess and bounds. - - Returns: - List of fit options that are passed to the fitter function. - """ - - return user_opt - - def _format_data(self, data: CurveData) -> CurveData: - """An optional subroutine to perform data pre-processing. - - Subclasses can override this method to apply pre-precessing to data values to fit. - - For example, - - - Apply smoothing to y values to deal with noisy observed values - - Remove redundant data points (outlier) - - Apply frequency filter function - - etc... - - By default, the analysis just takes average over the same x values and sort - data index by the x values in ascending order. - - .. note:: - - The data returned by this method should have the label "fit_ready". - - Returns: - Formatted CurveData instance. - """ - # take average over the same x value by keeping sigma - series, xdata, ydata, sigma, shots = multi_mean_xy_data( - series=data.data_index, - xdata=data.x, - ydata=data.y, - sigma=data.y_err, - shots=data.shots, - method="shots_weighted", - ) - - # sort by x value in ascending order - series, xdata, ydata, sigma, shots = data_sort( - series=series, - xdata=xdata, - ydata=ydata, - sigma=sigma, - shots=shots, - ) - - return CurveData( - label="fit_ready", - x=xdata, - y=ydata, - y_err=sigma, - shots=shots, - data_index=series, - ) - - # pylint: disable=unused-argument - def _extra_database_entry(self, fit_data: FitData) -> List[AnalysisResultData]: - """Calculate new quantity from the fit result. - - Subclasses can override this method to do post analysis. - - Args: - fit_data: Fit result. - - Returns: - List of database entry created from the fit data. - """ - return [] - - def _post_process_fit_result(self, fit_result: FitData) -> FitData: - """A hook that sub-classes can override to manipulate the result of the fit. - - Args: - fit_result: A result from the fitting. - - Returns: - A fit result that might be post-processed. - """ - return fit_result - - # pylint: disable=unused-argument - def _evaluate_quality(self, fit_data: FitData) -> Union[str, None]: - """Evaluate quality of the fit result. - - Subclasses can override this method to do post analysis. - - Args: - fit_data: Fit result. - - Returns: - String that represents fit result quality. Usually "good" or "bad". - """ - return None - - def _extract_curves( - self, experiment_data: ExperimentData, data_processor: Union[Callable, DataProcessor] - ): - """Extract curve data from experiment data. - - This method internally populates two types of curve data. - - - raw_data: - - This is the data directly obtained from the experiment data. - You can access this data with ``self._data(label="raw_data")``. - - - fit_ready: - - This is the formatted data created by pre-processing defined by - `self._format_data()` method. This method is implemented by subclasses. - You can access to this data with ``self._data(label="fit_ready")``. - - If multiple series exist, you can optionally specify ``series_name`` in - ``self._data`` method to filter data in the target series. - - .. notes:: - The target metadata properties to define each curve entry is described by - the class attribute __series__ (see `filter_kwargs`). - - Args: - experiment_data: ExperimentData object to fit parameters. - data_processor: A callable or DataProcessor instance to format data into numpy array. - This should take a list of dictionaries and return two tuple of float values, - that represent a y value and an error of it. - Raises: - DataProcessorError: When `x_key` specified in the analysis option is not - defined in the circuit metadata. - AnalysisError: When formatted data has label other than fit_ready. - """ - self.__processed_data_set = list() - - def _is_target_series(datum, **filters): - try: - return all(datum["metadata"][key] == val for key, val in filters.items()) - except KeyError: - return False - - # Extract X, Y, Y_sigma data - data = experiment_data.data() - - x_key = self.options.x_key - try: - xdata = np.asarray([datum["metadata"][x_key] for datum in data], dtype=float) - except KeyError as ex: - raise DataProcessorError( - f"X value key {x_key} is not defined in circuit metadata." - ) from ex - - if isinstance(data_processor, DataProcessor): - ydata = data_processor(data) - else: - y_nominals, y_stderrs = zip(*map(data_processor, data)) - ydata = unp.uarray(y_nominals, y_stderrs) - - # Store metadata - metadata = np.asarray([datum["metadata"] for datum in data], dtype=object) - - # Store shots - shots = np.asarray([datum.get("shots", np.nan) for datum in data]) - - # Find series (invalid data is labeled as -1) - data_index = np.full(xdata.size, -1, dtype=int) - for idx, series_def in enumerate(self.__series__): - data_matched = np.asarray( - [_is_target_series(datum, **series_def.filter_kwargs) for datum in data], dtype=bool - ) - data_index[data_matched] = idx - - # Store raw data - raw_data = CurveData( - label="raw_data", - x=xdata, - y=unp.nominal_values(ydata), - y_err=unp.std_devs(ydata), - shots=shots, - data_index=data_index, - metadata=metadata, - ) - self.__processed_data_set.append(raw_data) - - # Format raw data - formatted_data = self._format_data(raw_data) - if formatted_data.label != "fit_ready": - raise AnalysisError(f"Not expected data label {formatted_data.label} != fit_ready.") - self.__processed_data_set.append(formatted_data) - + # pylint: disable=bad-docstring-quotes + @deprecated_function( + last_version="0.4", + msg=( + "CurveAnalysis will also drop internal cache of processed data after 0.4. " + "Relevant method signature has been updated to directly receive curve data " + "rather than accessing data with this method." + ), + ) def _data( self, series_name: Optional[str] = None, label: Optional[str] = "fit_ready", ) -> CurveData: - """Getter for experiment data set. + """Deprecated. Getter for experiment data set. Args: series_name: Series name to search for. - label: Label attached to data set. By default it returns "fit_ready" data. + label: Label attached to data set. By default, it returns "fit_ready" data. Returns: Filtered curve data set. @@ -678,45 +118,22 @@ def _data( Raises: AnalysisError: When requested series or label are not defined. """ - # pylint: disable = undefined-loop-variable - for data in self.__processed_data_set: - if data.label == label: - break - else: - raise AnalysisError(f"Requested data with label {label} does not exist.") + try: + data = self.__processed_data_set[label] + except KeyError as ex: + raise AnalysisError(f"Requested data with label {label} does not exist.") from ex if series_name is None: return data - - for idx, series_def in enumerate(self.__series__): - if series_def.name == series_name: - locs = data.data_index == idx - return CurveData( - label=label, - x=data.x[locs], - y=data.y[locs], - y_err=data.y_err[locs], - shots=data.shots[locs], - data_index=idx, - metadata=data.metadata[locs] if data.metadata is not None else None, - ) - - raise AnalysisError(f"Specified series {series_name} is not defined in this analysis.") - - @property - def _num_qubits(self) -> int: - return len(self._physical_qubits) + return data.get_subset_of(series_name) def _run_analysis( self, experiment_data: ExperimentData ) -> Tuple[List[AnalysisResultData], List["pyplot.Figure"]]: - # - # 1. Parse arguments - # # Update all fit functions in the series definitions if fixed parameter is defined. + # These lines will be removed once proper fit model class is implemented. assigned_params = self.options.fixed_parameters - if assigned_params: # Check if all parameters are assigned. if any(v is None for v in assigned_params.values()): @@ -726,7 +143,6 @@ def _run_analysis( f"All values of fixed-parameters, i.e. {assigned_params}, " "must be provided by the analysis options to run this analysis." ) - # Override series definition with assigned fit functions. assigned_series = [] for series_def in self.__series__: @@ -736,244 +152,115 @@ def _run_analysis( assigned_series.append(SeriesDef(**dict_def)) self.__series__ = assigned_series - # get experiment metadata - try: - self._physical_qubits = experiment_data.metadata["physical_qubits"] - except KeyError: - pass - - # - # 2. Setup data processor - # - - # If no data processor was provided at run-time we infer one from the job - # metadata and default to the data processor for averaged classified data. - data_processor = self.options.data_processor - - if not data_processor: - data_processor = get_processor(experiment_data, self.options) - - if isinstance(data_processor, DataProcessor) and not data_processor.is_trained: - # Qiskit DataProcessor instance. May need calibration. - data_processor.train(data=experiment_data.data()) - - # Initialize fit figure canvas - if self.options.plot: - self.drawer.initialize_canvas() - - # - # 3. Extract curve entries from experiment data - # - self._extract_curves(experiment_data=experiment_data, data_processor=data_processor) + # Prepare for fitting + self._initialize(experiment_data) + analysis_results = [] - # TODO remove _data method dependency in follow-up - # self.__processed_data_set will be removed from instance. + # Run data processing + processed_data = self._run_data_processing(experiment_data.data(), self.__series__) - # Draw raw data if self.options.plot and self.options.plot_raw_data: for s in self.__series__: - raw_data = self._data(label="raw_data", series_name=s.name) + sub_data = processed_data.get_subset_of(s.name) self.drawer.draw_raw_data( - x_data=raw_data.x, - y_data=raw_data.y, + x_data=sub_data.x, + y_data=sub_data.y, ax_index=s.canvas, ) + # for backward compatibility, will be removed in 0.4. + self.__processed_data_set["raw_data"] = processed_data - # Draw formatted data + # Format data + formatted_data = self._format_data(processed_data) if self.options.plot: for s in self.__series__: - curve_data = self._data(label="fit_ready", series_name=s.name) self.drawer.draw_formatted_data( - x_data=curve_data.x, - y_data=curve_data.y, - y_err_data=curve_data.y_err, + x_data=formatted_data.x, + y_data=formatted_data.y, + y_err_data=formatted_data.y_err, name=s.name, ax_index=s.canvas, color=s.plot_color, marker=s.plot_symbol, ) + # for backward compatibility, will be removed in 0.4. + self.__processed_data_set["fit_ready"] = formatted_data - # - # 4. Run fitting - # - formatted_data = self._data(label="fit_ready") - - # Generate algorithmic initial guesses and boundaries - default_fit_opt = FitOptions( - parameters=self.parameters, - default_p0=self.options.p0, - default_bounds=self.options.bounds, - **self.options.curve_fitter_options, - ) - - fit_options = self._generate_fit_guesses(default_fit_opt) - if isinstance(fit_options, FitOptions): - fit_options = [fit_options] - - # Run fit for each configuration - fit_results = [] - for fit_opt in set(fit_options): - try: - fit_result = self.options.curve_fitter( - funcs=[series_def.fit_func for series_def in self.__series__], - series=formatted_data.data_index, - xdata=formatted_data.x, - ydata=formatted_data.y, - sigma=formatted_data.y_err, - **fit_opt.options, - ) - fit_results.append(fit_result) - except AnalysisError: - # Some guesses might be too far from the true parameters and may thus fail. - # We ignore initial guesses that fail and continue with the next fit candidate. - pass - - # Find best value with chi-squared value - if len(fit_results) == 0: - warnings.warn( - "All initial guesses and parameter boundaries failed to fit the data. " - "Please provide better initial guesses or fit parameter boundaries.", - UserWarning, - ) - # at least return raw data points rather than terminating - fit_result = None - else: - fit_result = sorted(fit_results, key=lambda r: r.reduced_chisq)[0] - fit_result = self._post_process_fit_result(fit_result) + # Run fitting + fit_data = self._run_curve_fit(formatted_data, self.__series__) - # - # 5. Create database entry - # - analysis_results = [] - if fit_result: - # pylint: disable=assignment-from-none - quality = self._evaluate_quality(fit_data=fit_result) - - fit_models = { - series_def.name: series_def.model_description or "no description" - for series_def in self.__series__ + # Create figure and result data + if fit_data: + metadata = self.options.extra.copy() + metadata["fit_models"] = { + s.name: s.model_description or "no description" for s in self.__series__ } - - # overview entry - analysis_results.append( - AnalysisResultData( - name=PARAMS_ENTRY_PREFIX + self.__class__.__name__, - value=[p.nominal_value for p in fit_result.popt], - chisq=fit_result.reduced_chisq, - quality=quality, - extra={ - "popt_keys": fit_result.popt_keys, - "dof": fit_result.dof, - "covariance_mat": fit_result.pcov, - "fit_models": fit_models, - **self.options.extra, - }, + quality = self._evaluate_quality(fit_data) + + # Create analysis results + analysis_results.extend(self._create_analysis_results(fit_data, quality, **metadata)) + # calling old extra entry method for backward compatibility + if hasattr(self, "_extra_database_entry"): + warnings.warn( + "Method '_extra_database_entry' has been deprecated and will be " + "removed after 0.4. Please override new method " + "'_create_analysis_results' with updated method signature.", + DeprecationWarning, ) - ) - - # output special parameters - result_parameters = self.options.result_parameters - if result_parameters: - for param_repr in result_parameters: - if isinstance(param_repr, ParameterRepr): - p_name = param_repr.name - p_repr = param_repr.repr or param_repr.name - unit = param_repr.unit - else: - p_name = param_repr - p_repr = param_repr - unit = None - - fit_val = fit_result.fitval(p_name) - if unit: - metadata = copy.copy(self.options.extra) - metadata["unit"] = unit - else: - metadata = self.options.extra - - result_entry = AnalysisResultData( - name=p_repr, - value=fit_val, - chisq=fit_result.reduced_chisq, - quality=quality, - extra=metadata, + deprecated_method = getattr(self, "_extra_database_entry") + analysis_results.extend(deprecated_method(fit_data)) + + # Draw fit curves and report + if self.options.plot: + for s in self.__series__: + interp_x = np.linspace(*fit_data.x_range, 100) + + params = {} + for fitpar in s.signature: + if fitpar in self.options.fixed_parameters: + params[fitpar] = self.options.fixed_parameters[fitpar] + else: + params[fitpar] = fit_data.fitval(fitpar) + + y_data_with_uncertainty = s.fit_func(interp_x, **params) + y_mean = unp.nominal_values(y_data_with_uncertainty) + y_std = unp.std_devs(y_data_with_uncertainty) + # Draw fit line + self.drawer.draw_fit_line( + x_data=interp_x, + y_data=y_mean, + ax_index=s.canvas, + color=s.plot_color, ) - analysis_results.append(result_entry) - - # add extra database entries - analysis_results.extend(self._extra_database_entry(fit_result)) - - if self.options.return_data_points: - # save raw data points in the data base if option is set (default to false) - raw_data_dict = dict() - for series_def in self.__series__: - series_data = self._data(series_name=series_def.name, label="raw_data") - raw_data_dict[series_def.name] = { - "xdata": series_data.x, - "ydata": series_data.y, - "sigma": series_data.y_err, - } - raw_data_entry = AnalysisResultData( - name=DATA_ENTRY_PREFIX + self.__class__.__name__, - value=raw_data_dict, - extra={ - "x-unit": self.drawer.options.xval_unit, - "y-unit": self.drawer.options.yval_unit, - }, - ) - analysis_results.append(raw_data_entry) - - # Draw fit results if fitting succeeded - if self.options.plot and fit_result: - for s in self.__series__: - interp_x = np.linspace(*fit_result.x_range, 100) - - params = {} - for fitpar in s.signature: - if fitpar in self.options.fixed_parameters: - params[fitpar] = self.options.fixed_parameters[fitpar] - else: - params[fitpar] = fit_result.fitval(fitpar) - - y_data_with_uncertainty = s.fit_func(interp_x, **params) - y_mean = unp.nominal_values(y_data_with_uncertainty) - y_std = unp.std_devs(y_data_with_uncertainty) - # Draw fit line - self.drawer.draw_fit_line( - x_data=interp_x, - y_data=y_mean, - ax_index=s.canvas, - color=s.plot_color, - ) - # Draw confidence intervals with different n_sigma - sigmas = unp.std_devs(y_data_with_uncertainty) - if np.isfinite(sigmas).all(): - for n_sigma, alpha in self.drawer.options.plot_sigma: - self.drawer.draw_confidence_interval( - x_data=interp_x, - y_ub=y_mean + n_sigma * y_std, - y_lb=y_mean - n_sigma * y_std, - ax_index=s.canvas, - alpha=alpha, - color=s.plot_color, - ) - - # Draw fitting report - report_description = "" - for res in analysis_results: - if isinstance(res.value, (float, uncertainties.UFloat)): - report_description += f"{analysis_result_to_repr(res)}\n" - report_description += r"Fit $\chi^2$ = " + f"{fit_result.reduced_chisq: .4g}" - self.drawer.draw_fit_report(description=report_description) - - # Output figure + # Draw confidence intervals with different n_sigma + sigmas = unp.std_devs(y_data_with_uncertainty) + if np.isfinite(sigmas).all(): + for n_sigma, alpha in self.drawer.options.plot_sigma: + self.drawer.draw_confidence_interval( + x_data=interp_x, + y_ub=y_mean + n_sigma * y_std, + y_lb=y_mean - n_sigma * y_std, + ax_index=s.canvas, + alpha=alpha, + color=s.plot_color, + ) + # Write fitting report + report_description = "" + for res in analysis_results: + if isinstance(res.value, (float, UFloat)): + report_description += f"{analysis_result_to_repr(res)}\n" + report_description += r"Fit $\chi^2$ = " + f"{fit_data.reduced_chisq: .4g}" + self.drawer.draw_fit_report(description=report_description) + + # Add raw data points + analysis_results.extend(self._create_curve_data(formatted_data, self.__series__)) + + # Finalize plot if self.options.plot: self.drawer.format_canvas() - figures = [self.drawer.figure] - else: - figures = [] + return analysis_results, [self.drawer.figure] - return analysis_results, figures + return analysis_results, [] @classmethod def from_config(cls, config: Union[AnalysisConfig, Dict]) -> "CurveAnalysis": @@ -1006,7 +293,7 @@ def from_config(cls, config: Union[AnalysisConfig, Dict]) -> "CurveAnalysis": def is_error_not_significant( - val: Union[float, uncertainties.UFloat], + val: Union[float, UFloat], fraction: float = 1.0, absolute: Optional[float] = None, ) -> bool: @@ -1040,9 +327,12 @@ def analysis_result_to_repr(result: AnalysisResultData) -> str: Returns: String representation of the data. + + Raises: + AnalysisError: When the result data is not likely fit parameter. """ - if not isinstance(result.value, (float, uncertainties.UFloat)): - return AnalysisError(f"Result data {result.name} is not a valid fit parameter data type.") + if not isinstance(result.value, (float, UFloat)): + raise AnalysisError(f"Result data {result.name} is not a valid fit parameter data type.") unit = result.extra.get("unit", None) diff --git a/qiskit_experiments/curve_analysis/curve_data.py b/qiskit_experiments/curve_analysis/curve_data.py index 861e83dde9..32e56b50cb 100644 --- a/qiskit_experiments/curve_analysis/curve_data.py +++ b/qiskit_experiments/curve_analysis/curve_data.py @@ -25,31 +25,39 @@ @dataclasses.dataclass(frozen=True) class SeriesDef: - """Description of curve.""" + """A dataclass to describe the definition of the curve. + + Attributes: + fit_func: A callable that defines the fit model of this curve. The argument names + in the callable are parsed to create the fit parameter list, which will appear + in the analysis results. The first argument should be ``x`` that represents + X-values that the experiment sweeps. + filter_kwargs: Optional. Dictionary of properties that uniquely identifies this series. + This dictionary is used for data processing. + This must be provided when the curve analysis consists of multiple series. + name: Optional. Name of this series. + plot_color: Optional. String representation of the color that is used to draw fit data + and data points in the output figure. This depends on the drawer class + being set to the curve analysis options. Usually this conforms to the + Matplotlib color names. + plot_symbol: Optional. String representation of the marker shape that is used to draw + data points in the output figure. This depends on the drawer class + being set to the curve analysis options. Usually this conforms to the + Matplotlib symbol names. + canvas: Optional. Index of sub-axis in the output figure that draws this curve. + This option is valid only when the drawer instance provides multi-axis drawing. + model_description: Optional. Arbitrary string representation of this fit model. + This string will appear in the analysis results as a part of metadata. + """ - # Arbitrary callback to define the fit function. First argument should be x. fit_func: Callable - - # Keyword dictionary to define the series with circuit metadata filter_kwargs: Dict[str, Any] = dataclasses.field(default_factory=dict) - - # Name of this series. This name will appear in the figure and raw x-y value report. name: str = "Series-0" - - # Color of this line. plot_color: str = "black" - - # Symbol to represent data points of this line. plot_symbol: str = "o" - - # Latex description of this fit model - model_description: Optional[str] = None - - # Index of canvas if the result figure is multi-panel canvas: Optional[int] = None - - # Automatically extracted signature of the fit function - signature: List[str] = dataclasses.field(init=False) + model_description: Optional[str] = None + signature: Tuple[str, ...] = dataclasses.field(init=False) def __post_init__(self): """Parse the fit function signature to extract the names of the variables. @@ -57,7 +65,7 @@ def __post_init__(self): Fit functions take arguments F(x, p0, p1, p2, ...) thus the first value should be excluded. """ signature = list(inspect.signature(self.fit_func).parameters.keys()) - fitparams = signature[1:] + fitparams = tuple(signature[1:]) # Note that this dataclass is frozen object.__setattr__(self, "signature", fitparams) @@ -65,54 +73,90 @@ def __post_init__(self): @dataclasses.dataclass(frozen=True) class CurveData: - """Set of extracted experiment data.""" - - # Name of this data set - label: str + """A dataclass that manages the multiple arrays comprising the dataset for fitting. + + This dataset can consist of X, Y values from multiple series. + To extract curve data of the particular series, :meth:`get_subset_of` can be used. + + Attributes: + x: X-values that experiment sweeps. + y: Y-values that observed and processed by the data processor. + y_err: Uncertainty of the Y-values which is created by the data processor. + Usually this assumes standard error. + shots: Number of shots used in the experiment to obtain the Y-values. + data_allocation: List with identical size with other arrays. + The value indicates the series index of the corresponding element. + This is classified based upon the matching of :attr:`SeriesDef.filter_kwargs` + with the circuit metadata of the corresponding data index. + If metadata doesn't match with any series definition, element is filled with ``-1``. + labels: List of curve labels. The list index corresponds to the series index. + """ - # X data x: np.ndarray - - # Y data (measured data) y: np.ndarray - - # Error bar y_err: np.ndarray - - # Shots number shots: np.ndarray + data_allocation: np.ndarray + labels: List[str] - # Maping of data index to series index - data_index: Union[np.ndarray, int] + def get_subset_of(self, index: Union[str, int]) -> "CurveData": + """Filter data by series name or index. - # Metadata associated with each data point. Generated from the circuit metadata. - metadata: np.ndarray = None + Args: + index: Series index of name. + + Returns: + A subset of data corresponding to a particular series. + """ + if isinstance(index, int): + _index = index + _name = self.labels[index] + else: + _index = self.labels.index(index) + _name = index + + locs = self.data_allocation == _index + return CurveData( + x=self.x[locs], + y=self.y[locs], + y_err=self.y_err[locs], + shots=self.shots[locs], + data_allocation=np.full(np.count_nonzero(locs), _index), + labels=[_name], + ) @dataclasses.dataclass(frozen=True) class FitData: - """Set of data generated by the fit function.""" + """A dataclass to store the outcome of the fitting. + + Attributes: + popt: List of optimal parameter values with uncertainties if available. + popt_keys: List of parameter names being fit. + pcov: Covariance matrix from the least square fitting. + reduced_chisq: Reduced Chi-squared value for the fit curve. + dof: Degree of freedom in this fit model. + x_data: X-values provided to the fitter. + y_data: Y-values provided to the fitter. + """ - # Order sensitive fit parameter values popt: List[uncertainties.UFloat] - - # Order sensitive parameter name list popt_keys: List[str] - - # Covariance matrix pcov: np.ndarray - - # Reduced Chi-squared value of fit curve reduced_chisq: float - - # Degree of freedom dof: int + x_data: np.ndarray + y_data: np.ndarray - # X data range - x_range: Tuple[float, float] + @property + def x_range(self) -> Tuple[float, float]: + """Range of x values.""" + return np.min(self.x_data), np.max(self.x_data) - # Y data range - y_range: Tuple[float, float] + @property + def y_range(self) -> Tuple[float, float]: + """Range of y values.""" + return np.min(self.y_data), np.max(self.y_data) def fitval(self, key: str) -> uncertainties.UFloat: """A helper method to get fit value object from parameter key name. @@ -136,7 +180,13 @@ def fitval(self, key: str) -> uncertainties.UFloat: @dataclasses.dataclass class ParameterRepr: - """Detailed description of fitting parameter.""" + """Detailed description of fitting parameter. + + Attributes: + name: Original name of the fit parameter being defined in the fit model. + repr: Optional. Human-readable parameter name shown in the analysis result and in the figure. + unit: Optional. Physical unit of this parameter if applicable. + """ # Fitter argument name name: str diff --git a/qiskit_experiments/curve_analysis/curve_fit.py b/qiskit_experiments/curve_analysis/curve_fit.py index 9b8c948119..93c2925c1e 100644 --- a/qiskit_experiments/curve_analysis/curve_fit.py +++ b/qiskit_experiments/curve_analysis/curve_fit.py @@ -155,18 +155,14 @@ def fit_func(x, *params): residues = residues / (sigma**2) reduced_chisq = np.sum(residues) / dof - # Compute data range for fit - xdata_range = np.min(xdata), np.max(xdata) - ydata_range = np.min(ydata), np.max(ydata) - return FitData( popt=list(fit_params), popt_keys=list(param_keys), pcov=pcov, reduced_chisq=reduced_chisq, dof=dof, - x_range=xdata_range, - y_range=ydata_range, + x_data=xdata, + y_data=ydata, ) diff --git a/qiskit_experiments/curve_analysis/standard_analysis/decay.py b/qiskit_experiments/curve_analysis/standard_analysis/decay.py index 740384ee66..771b5e4846 100644 --- a/qiskit_experiments/curve_analysis/standard_analysis/decay.py +++ b/qiskit_experiments/curve_analysis/standard_analysis/decay.py @@ -61,21 +61,19 @@ class DecayAnalysis(curve.CurveAnalysis): ] def _generate_fit_guesses( - self, user_opt: curve.FitOptions + self, + user_opt: curve.FitOptions, + curve_data: curve.CurveData, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: - """Compute the initial guesses. + """Create algorithmic guess with analysis options and curve data. Args: user_opt: Fit options filled with user provided guess and bounds. + curve_data: Formatted data collection to fit. Returns: List of fit options that are passed to the fitter function. - - Raises: - AnalysisError: When the y data is likely constant. """ - curve_data = self._data() - user_opt.p0.set_if_empty(base=curve.guess.min_height(curve_data.y)[0]) alpha = curve.guess.exp_decay(curve_data.x, curve_data.y) diff --git a/qiskit_experiments/curve_analysis/standard_analysis/error_amplification_analysis.py b/qiskit_experiments/curve_analysis/standard_analysis/error_amplification_analysis.py index 0e8749bcfe..a06af6d50c 100644 --- a/qiskit_experiments/curve_analysis/standard_analysis/error_amplification_analysis.py +++ b/qiskit_experiments/curve_analysis/standard_analysis/error_amplification_analysis.py @@ -76,9 +76,6 @@ class ErrorAmplificationAnalysis(curve.CurveAnalysis): often correspond to symmetry points of the fit function. Furthermore, this type of analysis is intended for values of :math:`d\theta` close to zero. - # section: note - - Different analysis classes may subclass this class to fix some of the fit parameters. """ __series__ = [ @@ -109,7 +106,7 @@ def _default_options(cls): considered as good. Defaults to :math:`\pi/2`. """ default_options = super()._default_options() - default_options.curve_plotter.set_options( + default_options.curve_drawer.set_options( xlabel="Number of gates (n)", ylabel="Population", ylim=(0, 1.0), @@ -120,22 +117,21 @@ def _default_options(cls): return default_options def _generate_fit_guesses( - self, user_opt: curve.FitOptions + self, + user_opt: curve.FitOptions, + curve_data: curve.CurveData, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: - """Compute the initial guesses. + """Create algorithmic guess with analysis options and curve data. Args: user_opt: Fit options filled with user provided guess and bounds. + curve_data: Formatted data collection to fit. Returns: List of fit options that are passed to the fitter function. - - Raises: - CalibrationError: When ``angle_per_gate`` is missing. """ fixed_params = self.options.fixed_parameters - curve_data = self._data() max_abs_y, _ = curve.guess.max_height(curve_data.y, absolute=True) max_y, min_y = np.max(curve_data.y), np.min(curve_data.y) diff --git a/qiskit_experiments/curve_analysis/standard_analysis/gaussian.py b/qiskit_experiments/curve_analysis/standard_analysis/gaussian.py index dd56293391..2bf57f1c35 100644 --- a/qiskit_experiments/curve_analysis/standard_analysis/gaussian.py +++ b/qiskit_experiments/curve_analysis/standard_analysis/gaussian.py @@ -71,7 +71,7 @@ class GaussianAnalysis(curve.CurveAnalysis): @classmethod def _default_options(cls) -> Options: options = super()._default_options() - options.curve_plotter.set_options( + options.curve_drawer.set_options( xlabel="Frequency", ylabel="Signal (arb. units)", xval_unit="Hz", @@ -81,17 +81,19 @@ def _default_options(cls) -> Options: return options def _generate_fit_guesses( - self, user_opt: curve.FitOptions + self, + user_opt: curve.FitOptions, + curve_data: curve.CurveData, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: - """Compute the initial guesses. + """Create algorithmic guess with analysis options and curve data. Args: user_opt: Fit options filled with user provided guess and bounds. + curve_data: Formatted data collection to fit. Returns: List of fit options that are passed to the fitter function. """ - curve_data = self._data() max_abs_y, _ = curve.guess.max_height(curve_data.y, absolute=True) user_opt.bounds.set_if_empty( @@ -128,22 +130,18 @@ def _evaluate_quality(self, fit_data: curve.FitData) -> Union[str, None]: threshold of two, and - a standard error on the sigma of the Gaussian that is smaller than the sigma. """ - curve_data = self._data() - - max_freq = np.max(curve_data.x) - min_freq = np.min(curve_data.x) - freq_increment = np.mean(np.diff(curve_data.x)) + freq_increment = np.mean(np.diff(fit_data.x_data)) fit_a = fit_data.fitval("a") fit_b = fit_data.fitval("b") fit_freq = fit_data.fitval("freq") fit_sigma = fit_data.fitval("sigma") - snr = abs(fit_a.n) / np.sqrt(abs(np.median(curve_data.y) - fit_b.n)) - fit_width_ratio = fit_sigma.n / (max_freq - min_freq) + snr = abs(fit_a.n) / np.sqrt(abs(np.median(fit_data.y_data) - fit_b.n)) + fit_width_ratio = fit_sigma.n / np.ptp(fit_data.x_data) criteria = [ - min_freq <= fit_freq.n <= max_freq, + fit_data.x_range[0] <= fit_freq.n <= fit_data.x_range[1], 1.5 * freq_increment < fit_sigma.n, fit_width_ratio < 0.25, fit_data.reduced_chisq < 3, diff --git a/qiskit_experiments/curve_analysis/standard_analysis/oscillation.py b/qiskit_experiments/curve_analysis/standard_analysis/oscillation.py index 578f100747..2502da8a95 100644 --- a/qiskit_experiments/curve_analysis/standard_analysis/oscillation.py +++ b/qiskit_experiments/curve_analysis/standard_analysis/oscillation.py @@ -66,17 +66,19 @@ class OscillationAnalysis(curve.CurveAnalysis): ] def _generate_fit_guesses( - self, user_opt: curve.FitOptions + self, + user_opt: curve.FitOptions, + curve_data: curve.CurveData, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: - """Compute the initial guesses. + """Create algorithmic guess with analysis options and curve data. Args: user_opt: Fit options filled with user provided guess and bounds. + curve_data: Formatted data collection to fit. Returns: List of fit options that are passed to the fitter function. """ - curve_data = self._data() max_abs_y, _ = curve.guess.max_height(curve_data.y, absolute=True) user_opt.bounds.set_if_empty( @@ -182,18 +184,19 @@ class DumpedOscillationAnalysis(curve.CurveAnalysis): ] def _generate_fit_guesses( - self, user_opt: curve.FitOptions + self, + user_opt: curve.FitOptions, + curve_data: curve.CurveData, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: - """Compute the initial guesses. + """Create algorithmic guess with analysis options and curve data. Args: user_opt: Fit options filled with user provided guess and bounds. + curve_data: Formatted data collection to fit. Returns: List of fit options that are passed to the fitter function. """ - curve_data = self._data() - user_opt.p0.set_if_empty( amp=0.5, base=curve.guess.constant_sinusoidal_offset(curve_data.y), diff --git a/qiskit_experiments/curve_analysis/standard_analysis/resonance.py b/qiskit_experiments/curve_analysis/standard_analysis/resonance.py index 7c45f88437..9f3a2cc9b4 100644 --- a/qiskit_experiments/curve_analysis/standard_analysis/resonance.py +++ b/qiskit_experiments/curve_analysis/standard_analysis/resonance.py @@ -71,7 +71,7 @@ class ResonanceAnalysis(curve.CurveAnalysis): @classmethod def _default_options(cls) -> Options: options = super()._default_options() - options.curve_plotter.set_options( + options.curve_drawer.set_options( xlabel="Frequency", ylabel="Signal (arb. units)", xval_unit="Hz", @@ -81,17 +81,19 @@ def _default_options(cls) -> Options: return options def _generate_fit_guesses( - self, user_opt: curve.FitOptions + self, + user_opt: curve.FitOptions, + curve_data: curve.CurveData, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: - """Compute the initial guesses. + """Create algorithmic guess with analysis options and curve data. Args: user_opt: Fit options filled with user provided guess and bounds. + curve_data: Formatted data collection to fit. Returns: List of fit options that are passed to the fitter function. """ - curve_data = self._data() max_abs_y, _ = curve.guess.max_height(curve_data.y, absolute=True) user_opt.bounds.set_if_empty( @@ -128,22 +130,18 @@ def _evaluate_quality(self, fit_data: curve.FitData) -> Union[str, None]: threshold of two, and - a standard error on the kappa of the Lorentzian that is smaller than the kappa. """ - curve_data = self._data() - - max_freq = np.max(curve_data.x) - min_freq = np.min(curve_data.x) - freq_increment = np.mean(np.diff(curve_data.x)) + freq_increment = np.mean(np.diff(fit_data.x_data)) fit_a = fit_data.fitval("a") fit_b = fit_data.fitval("b") fit_freq = fit_data.fitval("freq") fit_kappa = fit_data.fitval("kappa") - snr = abs(fit_a.n) / np.sqrt(abs(np.median(curve_data.y) - fit_b.n)) - fit_width_ratio = fit_kappa.n / (max_freq - min_freq) + snr = abs(fit_a.n) / np.sqrt(abs(np.median(fit_data.y_data) - fit_b.n)) + fit_width_ratio = fit_kappa.n / np.ptp(fit_data.x_data) criteria = [ - min_freq <= fit_freq.n <= max_freq, + fit_data.x_range[0] <= fit_freq.n <= fit_data.x_range[1], 1.5 * freq_increment < fit_kappa.n, fit_width_ratio < 0.25, fit_data.reduced_chisq < 3, diff --git a/qiskit_experiments/library/characterization/analysis/cr_hamiltonian_analysis.py b/qiskit_experiments/library/characterization/analysis/cr_hamiltonian_analysis.py index 6eafbdd95c..bcd5028f7d 100644 --- a/qiskit_experiments/library/characterization/analysis/cr_hamiltonian_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/cr_hamiltonian_analysis.py @@ -21,7 +21,6 @@ import qiskit_experiments.curve_analysis as curve import qiskit_experiments.data_processing as dp -from qiskit_experiments.database_service.device_component import Qubit from qiskit_experiments.framework import AnalysisResultData @@ -197,7 +196,7 @@ class CrossResonanceHamiltonianAnalysis(curve.CurveAnalysis): def _default_options(cls): """Return the default analysis options.""" default_options = super()._default_options() - default_options.curve_plotter.set_options( + default_options.curve_drawer.set_options( subplots=(3, 1), xlabel="Flat top width", ylabel=[ @@ -219,12 +218,15 @@ def _default_options(cls): return default_options def _generate_fit_guesses( - self, user_opt: curve.FitOptions + self, + user_opt: curve.FitOptions, + curve_data: curve.CurveData, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: - """Compute the initial guesses. + """Create algorithmic guess with analysis options and curve data. Args: user_opt: Fit options filled with user provided guess and bounds. + curve_data: Formatted data collection to fit. Returns: List of fit options that are passed to the fitter function. @@ -234,9 +236,9 @@ def _generate_fit_guesses( guesses = defaultdict(list) for control in (0, 1): - x_data = self._data(series_name=f"x|c={control}") - y_data = self._data(series_name=f"y|c={control}") - z_data = self._data(series_name=f"z|c={control}") + x_data = curve_data.get_subset_of(f"x|c={control}") + y_data = curve_data.get_subset_of(f"y|c={control}") + z_data = curve_data.get_subset_of(f"z|c={control}") omega_xyz = [] for data in (x_data, y_data, z_data): @@ -288,20 +290,22 @@ def _generate_fit_guesses( return fit_options - def _evaluate_quality(self, fit_data: curve.FitData) -> Union[str, None]: - """Algorithmic criteria for whether the fit is good or bad. + def _create_analysis_results( + self, + fit_data: curve.FitData, + quality: str, + **metadata, + ) -> List[AnalysisResultData]: + """Create analysis results for important fit parameters. - A good fit has: - - If chi-squared value is less than 3. - """ - if fit_data.reduced_chisq < 3: - return "good" - - return "bad" + Args: + fit_data: Fit outcome. + quality: Quality of fit outcome. - def _extra_database_entry(self, fit_data: curve.FitData) -> List[AnalysisResultData]: - """Calculate Hamiltonian coefficients from fit values.""" - extra_entries = [] + Returns: + List of analysis result data. + """ + outcomes = super()._create_analysis_results(fit_data, quality, **metadata) for control in ("z", "i"): for target in ("x", "y", "z"): @@ -313,14 +317,17 @@ def _extra_database_entry(self, fit_data: curve.FitData) -> List[AnalysisResultD else: coef_val = 0.5 * (p0_val + p1_val) / (2 * np.pi) - extra_entries.append( + outcomes.append( AnalysisResultData( name=f"omega_{control}{target}", value=coef_val, chisq=fit_data.reduced_chisq, - device_components=[Qubit(q) for q in self._physical_qubits], - extra={"unit": "Hz"}, + quality=quality, + extra={ + "unit": "Hz", + **metadata, + }, ) ) - return extra_entries + return outcomes diff --git a/qiskit_experiments/library/characterization/analysis/drag_analysis.py b/qiskit_experiments/library/characterization/analysis/drag_analysis.py index 82c79f865a..5f3effa7f8 100644 --- a/qiskit_experiments/library/characterization/analysis/drag_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/drag_analysis.py @@ -120,7 +120,7 @@ def _default_options(cls): descriptions of analysis options. """ default_options = super()._default_options() - default_options.curve_plotter.set_options( + default_options.curve_drawer.set_options( xlabel="Beta", ylabel="Signal (arb. units)", ) @@ -131,18 +131,21 @@ def _default_options(cls): return default_options def _generate_fit_guesses( - self, user_opt: curve.FitOptions + self, + user_opt: curve.FitOptions, + curve_data: curve.CurveData, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: - """Compute the initial guesses. + """Create algorithmic guess with analysis options and curve data. Args: user_opt: Fit options filled with user provided guess and bounds. + curve_data: Formatted data collection to fit. Returns: List of fit options that are passed to the fitter function. """ # Use a fast Fourier transform to guess the frequency. - x_data = self._data("series-0").x + x_data = curve_data.get_subset_of("series-0").x min_beta, max_beta = min(x_data), max(x_data) # Use the highest-frequency curve to estimate the oscillation frequency. @@ -152,7 +155,7 @@ def _generate_fit_guesses( ("series-2", "reps2"), key=lambda x: self.options.fixed_parameters[x[1]], ) - curve_data = self._data(series_label) + curve_data = curve_data.get_subset_of(series_label) reps2 = self.options.fixed_parameters[reps_label] freqs_guess = curve.guess.frequency(curve_data.x, curve_data.y) / reps2 user_opt.p0.set_if_empty(freq=freqs_guess) @@ -161,14 +164,14 @@ def _generate_fit_guesses( span_x = max(x_data) - min(x_data) beta_bound = max(5 / user_opt.p0["freq"], span_x) - ptp_y = np.ptp(self._data().y) + ptp_y = np.ptp(curve_data.y) user_opt.bounds.set_if_empty( amp=(-2 * ptp_y, 0), freq=(0, np.inf), beta=(avg_x - beta_bound, avg_x + beta_bound), - base=(min(self._data().y) - ptp_y, max(self._data().y) + ptp_y), + base=(min(curve_data.y) - ptp_y, max(curve_data.y) + ptp_y), ) - base_guess = (max(self._data().y) - min(self._data().y)) / 2 + base_guess = (max(curve_data.y) - min(curve_data.y)) / 2 user_opt.p0.set_if_empty(base=(user_opt.p0["amp"] or base_guess)) # Drag curves can sometimes be very flat, i.e. averages of y-data @@ -185,32 +188,49 @@ def _generate_fit_guesses( return options - def _post_process_fit_result(self, fit_result: curve.FitData) -> curve.FitData: - r"""Post-process the fit result from a Drag analysis. + def _run_curve_fit( + self, + curve_data: curve.CurveData, + series: List[curve.SeriesDef], + ) -> Union[None, curve.FitData]: + r"""Perform curve fitting on given data collection and fit models. - The Drag analysis should return the beta value that is closest to zero. - Since the oscillating term is of the form + .. note:: - .. math:: + This class post-processes the fit result from a Drag analysis. - \cos(2 \pi\cdot {\rm reps}_i \cdot {\rm freq}\cdot [x - \beta]) + The Drag analysis should return the beta value that is closest to zero. + Since the oscillating term is of the form - There is a periodicity in beta. This post processing finds the beta that is - closest to zero by performing the minimization using the modulo function. + .. math:: - .. math:: + \cos(2 \pi\cdot {\rm reps}_i \cdot {\rm freq}\cdot [x - \beta]) - n_\text{min} = \min_{n}|\beta_\text{fit} + n / {\rm freq}| + There is a periodicity in beta. This post processing finds the beta that is + closest to zero by performing the minimization using the modulo function. - and assigning the new beta value to + .. math:: - .. math:: + n_\text{min} = \min_{n}|\beta_\text{fit} + n / {\rm freq}| + + and assigning the new beta value to + + .. math:: - \beta = \beta_\text{fit} + n_\text{min} / {\rm freq}. + \beta = \beta_\text{fit} + n_\text{min} / {\rm freq}. + + Args: + curve_data: Formatted data to fit. + series: A list of fit models. + + Returns: + The best fitting outcome with minimum reduced chi-squared value. """ + fit_result = super()._run_curve_fit(curve_data, series) beta = fit_result.popt[2] freq = fit_result.popt[1] fit_result.popt[2] = ((beta + 1 / freq / 2) % (1 / freq)) - 1 / freq / 2 + return fit_result def _evaluate_quality(self, fit_data: curve.FitData) -> Union[str, None]: diff --git a/qiskit_experiments/library/characterization/analysis/ramsey_xy_analysis.py b/qiskit_experiments/library/characterization/analysis/ramsey_xy_analysis.py index cd96d8d391..27713212ef 100644 --- a/qiskit_experiments/library/characterization/analysis/ramsey_xy_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/ramsey_xy_analysis.py @@ -95,7 +95,7 @@ def _default_options(cls): descriptions of analysis options. """ default_options = super()._default_options() - default_options.curve_plotter.set_options( + default_options.curve_drawer.set_options( xlabel="Delay", ylabel="Signal (arb. units)", xval_unit="s", @@ -105,17 +105,20 @@ def _default_options(cls): return default_options def _generate_fit_guesses( - self, user_opt: curve.FitOptions + self, + user_opt: curve.FitOptions, + curve_data: curve.CurveData, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: - """Compute the initial guesses. + """Create algorithmic guess with analysis options and curve data. Args: user_opt: Fit options filled with user provided guess and bounds. + curve_data: Formatted data collection to fit. Returns: List of fit options that are passed to the fitter function. """ - max_abs_y, _ = curve.guess.max_height(self._data().y, absolute=True) + max_abs_y, _ = curve.guess.max_height(curve_data.y, absolute=True) user_opt.bounds.set_if_empty( amp=(-2 * max_abs_y, 2 * max_abs_y), @@ -127,7 +130,7 @@ def _generate_fit_guesses( # Default guess values freq_guesses, base_guesses = [], [] for series in ["X", "Y"]: - data = self._data(series) + data = curve_data.get_subset_of(series) freq_guesses.append(curve.guess.frequency(data.x, data.y)) base_guesses.append(curve.guess.constant_sinusoidal_offset(data.y)) @@ -135,8 +138,8 @@ def _generate_fit_guesses( user_opt.p0.set_if_empty(base=np.average(base_guesses)) # Guess the exponential decay by combining both curves - data_x = self._data("X") - data_y = self._data("Y") + data_x = curve_data.get_subset_of("X") + data_y = curve_data.get_subset_of("Y") decay_data = (data_x.y - user_opt.p0["base"]) ** 2 + (data_y.y - user_opt.p0["base"]) ** 2 user_opt.p0.set_if_empty( diff --git a/qiskit_experiments/library/characterization/analysis/t1_analysis.py b/qiskit_experiments/library/characterization/analysis/t1_analysis.py index a2cd46a1dd..8c2712995a 100644 --- a/qiskit_experiments/library/characterization/analysis/t1_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/t1_analysis.py @@ -30,7 +30,7 @@ class T1Analysis(curve.DecayAnalysis): def _default_options(cls) -> Options: """Default analysis options.""" options = super()._default_options() - options.curve_plotter.set_options( + options.curve_drawer.set_options( xlabel="Delay", ylabel="P(1)", xval_unit="s", diff --git a/qiskit_experiments/library/characterization/analysis/t2hahn_analysis.py b/qiskit_experiments/library/characterization/analysis/t2hahn_analysis.py index c769d88c67..dd03fad838 100644 --- a/qiskit_experiments/library/characterization/analysis/t2hahn_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/t2hahn_analysis.py @@ -34,7 +34,7 @@ class T2HahnAnalysis(curve.DecayAnalysis): def _default_options(cls) -> Options: """Default analysis options.""" options = super()._default_options() - options.curve_plotter.set_options( + options.curve_drawer.set_options( xlabel="Delay", ylabel="P(0)", xval_unit="s", diff --git a/qiskit_experiments/library/characterization/analysis/t2ramsey_analysis.py b/qiskit_experiments/library/characterization/analysis/t2ramsey_analysis.py index 3add85dfd5..8aaca649e1 100644 --- a/qiskit_experiments/library/characterization/analysis/t2ramsey_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/t2ramsey_analysis.py @@ -30,7 +30,7 @@ class T2RamseyAnalysis(curve.DumpedOscillationAnalysis): def _default_options(cls) -> Options: """Default analysis options.""" options = super()._default_options() - options.curve_plotter.set_options( + options.curve_drawer.set_options( xlabel="Delay", ylabel="P(0)", xval_unit="s", diff --git a/qiskit_experiments/library/randomized_benchmarking/interleaved_rb_analysis.py b/qiskit_experiments/library/randomized_benchmarking/interleaved_rb_analysis.py index e7e5fd7555..9ad7480b70 100644 --- a/qiskit_experiments/library/randomized_benchmarking/interleaved_rb_analysis.py +++ b/qiskit_experiments/library/randomized_benchmarking/interleaved_rb_analysis.py @@ -16,7 +16,7 @@ import numpy as np import qiskit_experiments.curve_analysis as curve -from qiskit_experiments.framework import AnalysisResultData +from qiskit_experiments.framework import AnalysisResultData, ExperimentData class InterleavedRBAnalysis(curve.CurveAnalysis): @@ -88,6 +88,10 @@ class InterleavedRBAnalysis(curve.CurveAnalysis): """ + def __init__(self): + super().__init__() + self._num_qubits = None + __series__ = [ curve.SeriesDef( name="Standard", @@ -119,12 +123,15 @@ def _default_options(cls): return default_options def _generate_fit_guesses( - self, user_opt: curve.FitOptions + self, + user_opt: curve.FitOptions, + curve_data: curve.CurveData, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: - """Compute the initial guesses. + """Create algorithmic guess with analysis options and curve data. Args: user_opt: Fit options filled with user provided guess and bounds. + curve_data: Formatted data collection to fit. Returns: List of fit options that are passed to the fitter function. @@ -140,11 +147,11 @@ def _generate_fit_guesses( a_guess = 1 - b_guess # for standard RB curve - std_curve = self._data(series_name="Standard") + std_curve = curve_data.get_subset_of("Standard") alpha_std = curve.guess.rb_decay(std_curve.x, std_curve.y, a=a_guess, b=b_guess) # for interleaved RB curve - int_curve = self._data(series_name="Interleaved") + int_curve = curve_data.get_subset_of("Interleaved") alpha_int = curve.guess.rb_decay(int_curve.x, int_curve.y, a=a_guess, b=b_guess) alpha_c = min(alpha_int / alpha_std, 1.0) @@ -158,23 +165,33 @@ def _generate_fit_guesses( return user_opt - def _format_data(self, data: curve.CurveData) -> curve.CurveData: - """Data format with averaging with sampling strategy.""" + def _format_data( + self, + curve_data: curve.CurveData, + ) -> curve.CurveData: + """Postprocessing for the processed dataset. + + Args: + curve_data: Processed dataset created from experiment results. + + Returns: + Formatted data. + """ # TODO Eventually move this to data processor, then create RB data processor. - # take average over the same x value by regenerating sigma from variance of y values - series, xdata, ydata, sigma, shots = curve.data_processing.multi_mean_xy_data( - series=data.data_index, - xdata=data.x, - ydata=data.y, - sigma=data.y_err, - shots=data.shots, + # take average over the same x value by keeping sigma + data_allocation, xdata, ydata, sigma, shots = curve.data_processing.multi_mean_xy_data( + series=curve_data.data_allocation, + xdata=curve_data.x, + ydata=curve_data.y, + sigma=curve_data.y_err, + shots=curve_data.shots, method="sample", ) # sort by x value in ascending order - series, xdata, ydata, sigma, shots = curve.data_processing.data_sort( - series=series, + data_allocation, xdata, ydata, sigma, shots = curve.data_processing.data_sort( + series=data_allocation, xdata=xdata, ydata=ydata, sigma=sigma, @@ -182,16 +199,31 @@ def _format_data(self, data: curve.CurveData) -> curve.CurveData: ) return curve.CurveData( - label="fit_ready", x=xdata, y=ydata, y_err=sigma, shots=shots, - data_index=series, + data_allocation=data_allocation, + labels=curve_data.labels, ) - def _extra_database_entry(self, fit_data: curve.FitData) -> List[AnalysisResultData]: - """Calculate EPC.""" + def _create_analysis_results( + self, + fit_data: curve.FitData, + quality: str, + **metadata, + ) -> List[AnalysisResultData]: + """Create analysis results for important fit parameters. + + Args: + fit_data: Fit outcome. + quality: Quality of fit outcome. + + Returns: + List of analysis result data. + """ + outcomes = super()._create_analysis_results(fit_data, quality, **metadata) + nrb = 2**self._num_qubits scale = (nrb - 1) / nrb @@ -212,15 +244,34 @@ def _extra_database_entry(self, fit_data: curve.FitData) -> List[AnalysisResultD systematic_err_l = epc.n - systematic_err systematic_err_r = epc.n + systematic_err - extra_data = AnalysisResultData( - name="EPC", - value=epc, - chisq=fit_data.reduced_chisq, - quality=self._evaluate_quality(fit_data), - extra={ - "EPC_systematic_err": systematic_err, - "EPC_systematic_bounds": [max(systematic_err_l, 0), systematic_err_r], - }, + outcomes.append( + AnalysisResultData( + name="EPC", + value=epc, + chisq=fit_data.reduced_chisq, + quality=quality, + extra={ + "EPC_systematic_err": systematic_err, + "EPC_systematic_bounds": [max(systematic_err_l, 0), systematic_err_r], + **metadata, + }, + ) ) - return [extra_data] + return outcomes + + def _initialize( + self, + experiment_data: ExperimentData, + ): + """Initialize curve analysis with experiment data. + + This method is called ahead of other processing. + + Args: + experiment_data: Experiment data to analyze. + """ + super()._initialize(experiment_data) + + # Get qubit number + self._num_qubits = len(experiment_data.metadata["physical_qubits"]) diff --git a/qiskit_experiments/library/randomized_benchmarking/rb_analysis.py b/qiskit_experiments/library/randomized_benchmarking/rb_analysis.py index 43e0b66bdf..d54b135789 100644 --- a/qiskit_experiments/library/randomized_benchmarking/rb_analysis.py +++ b/qiskit_experiments/library/randomized_benchmarking/rb_analysis.py @@ -81,6 +81,7 @@ class RBAnalysis(curve.CurveAnalysis): def __init__(self): super().__init__() self._gate_counts_per_clifford = None + self._physical_qubits = None @classmethod def _default_options(cls): @@ -97,7 +98,7 @@ def _default_options(cls): 2Q RB is corected to exclude the deporalization of underlying 1Q channels. """ default_options = super()._default_options() - default_options.curve_plotter.set_options( + default_options.curve_drawer.set_options( xlabel="Clifford Length", ylabel="P(0)", ) @@ -118,25 +119,26 @@ def set_options(self, **fields): super().set_options(**fields) def _generate_fit_guesses( - self, user_opt: curve.FitOptions + self, + user_opt: curve.FitOptions, + curve_data: curve.CurveData, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: - """Compute the initial guesses. + """Create algorithmic guess with analysis options and curve data. Args: user_opt: Fit options filled with user provided guess and bounds. + curve_data: Formatted data collection to fit. Returns: List of fit options that are passed to the fitter function. """ - curve_data = self._data() - user_opt.bounds.set_if_empty( a=(0, 1), alpha=(0, 1), b=(0, 1), ) - b_guess = 1 / 2**self._num_qubits + b_guess = 1 / 2 ** len(self._physical_qubits) a_guess = 1 - b_guess alpha_guess = curve.guess.rb_decay(curve_data.x, curve_data.y, a=a_guess, b=b_guess) @@ -148,23 +150,33 @@ def _generate_fit_guesses( return user_opt - def _format_data(self, data: curve.CurveData) -> curve.CurveData: - """Data format with averaging with sampling strategy.""" + def _format_data( + self, + curve_data: curve.CurveData, + ) -> curve.CurveData: + """Postprocessing for the processed dataset. + + Args: + curve_data: Processed dataset created from experiment results. + + Returns: + Formatted data. + """ # TODO Eventually move this to data processor, then create RB data processor. - # take average over the same x value by regenerating sigma from variance of y values - series, xdata, ydata, sigma, shots = curve.data_processing.multi_mean_xy_data( - series=data.data_index, - xdata=data.x, - ydata=data.y, - sigma=data.y_err, - shots=data.shots, + # take average over the same x value by keeping sigma + data_allocation, xdata, ydata, sigma, shots = curve.data_processing.multi_mean_xy_data( + series=curve_data.data_allocation, + xdata=curve_data.x, + ydata=curve_data.y, + sigma=curve_data.y_err, + shots=curve_data.shots, method="sample", ) # sort by x value in ascending order - series, xdata, ydata, sigma, shots = curve.data_processing.data_sort( - series=series, + data_allocation, xdata, ydata, sigma, shots = curve.data_processing.data_sort( + series=data_allocation, xdata=xdata, ydata=ydata, sigma=sigma, @@ -172,48 +184,62 @@ def _format_data(self, data: curve.CurveData) -> curve.CurveData: ) return curve.CurveData( - label="fit_ready", x=xdata, y=ydata, y_err=sigma, shots=shots, - data_index=series, + data_allocation=data_allocation, + labels=curve_data.labels, ) - def _extra_database_entry(self, fit_data: curve.FitData) -> List[AnalysisResultData]: - """Calculate EPC.""" - extra_entries = [] - # pylint: disable=assignment-from-none - quality = self._evaluate_quality(fit_data) + def _create_analysis_results( + self, + fit_data: curve.FitData, + quality: str, + **metadata, + ) -> List[AnalysisResultData]: + """Create analysis results for important fit parameters. + + Args: + fit_data: Fit outcome. + quality: Quality of fit outcome. + + Returns: + List of analysis result data. + """ + outcomes = super()._create_analysis_results(fit_data, quality, **metadata) + num_qubits = len(self._physical_qubits) # Calculate EPC alpha = fit_data.fitval("alpha") - scale = (2**self._num_qubits - 1) / (2**self._num_qubits) + scale = (2**num_qubits - 1) / (2**num_qubits) epc = scale * (1 - alpha) - extra_entries.append( + outcomes.append( AnalysisResultData( name="EPC", value=epc, chisq=fit_data.reduced_chisq, quality=quality, + extra=metadata, ) ) # Correction for 1Q depolarizing channel if EPGs are provided - if self.options.epg_1_qubit and self._num_qubits == 2: + if self.options.epg_1_qubit and num_qubits == 2: epc = _exclude_1q_error( epc=epc, qubits=self._physical_qubits, gate_counts_per_clifford=self._gate_counts_per_clifford, extra_analyses=self.options.epg_1_qubit, ) - extra_entries.append( + outcomes.append( AnalysisResultData( name="EPC_corrected", value=epc, chisq=fit_data.reduced_chisq, quality=quality, + extra=metadata, ) ) @@ -227,20 +253,33 @@ def _extra_database_entry(self, fit_data: curve.FitData) -> List[AnalysisResultD ) if epg_dict: for gate, epg_val in epg_dict.items(): - extra_entries.append( + outcomes.append( AnalysisResultData( name=f"EPG_{gate}", value=epg_val, chisq=fit_data.reduced_chisq, quality=quality, + extra=metadata, ) ) - return extra_entries + return outcomes + + def _initialize( + self, + experiment_data: ExperimentData, + ): + """Initialize curve analysis with experiment data. - def _run_analysis( - self, experiment_data: ExperimentData - ) -> Tuple[List[AnalysisResultData], List["pyplot.Figure"]]: + This method is called ahead of other processing. + + Args: + experiment_data: Experiment data to analyze. + + Raises: + AnalysisError: When circuit metadata for ops count is missing. + """ + super()._initialize(experiment_data) if self.options.gate_error_ratio is not None: # If gate error ratio is not False, EPG analysis is enabled. @@ -275,7 +314,8 @@ def _run_analysis( gate_error_ratio[gate] = _lookup_epg_ratio(gate, len(qinds)) self.set_options(gate_error_ratio=gate_error_ratio) - return super()._run_analysis(experiment_data) + # Get qubit number + self._physical_qubits = experiment_data.metadata["physical_qubits"] def _lookup_epg_ratio(gate: str, n_qubits: int) -> Union[None, int]: diff --git a/releasenotes/notes/cleanup-curve-analysis-96d7ff706cae5b4e.yaml b/releasenotes/notes/cleanup-curve-analysis-96d7ff706cae5b4e.yaml new file mode 100644 index 0000000000..1345436d86 --- /dev/null +++ b/releasenotes/notes/cleanup-curve-analysis-96d7ff706cae5b4e.yaml @@ -0,0 +1,29 @@ +--- +upgrade: + :class:`.BaseCurveAnalysis` class has been added as a superclass of :class:`.CurveAnalysis`. + New base class doesn't define the :meth:`_run_analysis` abstract method + and it cannot conduct analysis by itself, however it defines several subroutines + that can be combined to build a custom fitting process in the subclass. + This allows more flexibility to write custom curve analysis by + directly inheriting from the new base class. See :class:`.BaseCurveAnalysis` for details. + See also qiskit-experiments/#737 for discussion. + - | + The method :meth:`CurveAnalysis._generate_fit_guesses` has been upgraded with + new method signature. Now this method is called with ``curve_data`` argument + that provides dataset which is used for curve fitting. + If you define custom :class:`.CurveAnalysis` subclass in your codestack, + you may need to upgrade the method. See :class:`.BaseCurveAnalysis` for details. + - | + Arguments of :class:`.FitData` have been updated to take ``x_data`` and ``y_data`` + instead of ``x_range`` and ``y_range``. +deprecations: + - | + Several protected methods of :class:`.CurveAnalysis` intended to be overriden + or directly be used by subclass have been deprecated. + :meth:`CurveAnalysis._data` has been deprecated without alternative method + to make :class:`.CurveAnalysis` state cleaner. Now relevent curve analysis methods + requiring curve data are called with the ``curve_data`` argument. + :meth:`CurveAnalysis._extra_database_entry` has also been deprecated. + This method becomes a part of :meth:`CurveAnalysis._create_analysis_results`. + Analysis class author can override this method to inject a code to create + custom analysis results. diff --git a/test/curve_analysis/test_curve_fit.py b/test/curve_analysis/test_baseclass.py similarity index 79% rename from test/curve_analysis/test_curve_fit.py rename to test/curve_analysis/test_baseclass.py index bf54ca59af..55d2b1610a 100644 --- a/test/curve_analysis/test_curve_fit.py +++ b/test/curve_analysis/test_baseclass.py @@ -28,7 +28,7 @@ ParameterRepr, FitOptions, ) -from qiskit_experiments.curve_analysis.data_processing import probability +from qiskit_experiments.data_processing import DataProcessor, Probability from qiskit_experiments.exceptions import AnalysisError from qiskit_experiments.framework import ExperimentData @@ -76,85 +76,43 @@ def _default_options(cls): return TestAnalysis() -class TestFitData(QiskitExperimentsTestCase): - """Unittest for fit data dataclass.""" - - def test_get_value(self): - """Get fit value from fit data object.""" - pcov = np.diag(np.ones(3)) - popt = np.asarray([1.0, 2.0, 3.0]) - fit_params = correlated_values(popt, pcov) - - data = FitData( - popt=fit_params, - popt_keys=["a", "b", "c"], - pcov=pcov, - reduced_chisq=0.0, - dof=0, - x_range=(0, 0), - y_range=(0, 0), - ) - - a_val = data.fitval("a") - self.assertEqual(a_val, fit_params[0]) - - b_val = data.fitval("b") - self.assertEqual(b_val, fit_params[1]) - - c_val = data.fitval("c") - self.assertEqual(c_val, fit_params[2]) - - class TestCurveAnalysisUnit(QiskitExperimentsTestCase): """Unittest for curve fit analysis.""" - def setUp(self): - super().setUp() - self.xvalues = np.linspace(1.0, 5.0, 10) - - # Description of test setting - # - # - This model contains three curves, namely, curve1, curve2, curve3 - # - Each curve can be represented by the same function - # - Parameter amp and baseline are shared among all curves - # - Each curve has unique lamb - # - In total 5 parameters in the fit, namely, p0, p1, p2, p3 - # - self.analysis = create_new_analysis( - series=[ - SeriesDef( - name="curve1", - fit_func=lambda x, par0, par1, par2, par3, par4: fit_function.exponential_decay( - x, amp=par0, lamb=par1, baseline=par4 - ), - filter_kwargs={"type": 1, "valid": True}, - model_description=r"p_0 * \exp(p_1 x) + p4", + class TestAnalysis(CurveAnalysis): + """Fake analysis class for unittest.""" + + __series__ = [ + SeriesDef( + name="curve1", + fit_func=lambda x, par0, par1, par2, par3, par4: fit_function.exponential_decay( + x, amp=par0, lamb=par1, baseline=par4 ), - SeriesDef( - name="curve2", - fit_func=lambda x, par0, par1, par2, par3, par4: fit_function.exponential_decay( - x, amp=par0, lamb=par2, baseline=par4 - ), - filter_kwargs={"type": 2, "valid": True}, - model_description=r"p_0 * \exp(p_2 x) + p4", + filter_kwargs={"op1": 1, "op2": True}, + model_description=r"p_0 * \exp(p_1 x) + p4", + ), + SeriesDef( + name="curve2", + fit_func=lambda x, par0, par1, par2, par3, par4: fit_function.exponential_decay( + x, amp=par0, lamb=par2, baseline=par4 ), - SeriesDef( - name="curve3", - fit_func=lambda x, par0, par1, par2, par3, par4: fit_function.exponential_decay( - x, amp=par0, lamb=par3, baseline=par4 - ), - filter_kwargs={"type": 3, "valid": True}, - model_description=r"p_0 * \exp(p_3 x) + p4", + filter_kwargs={"op1": 2, "op2": True}, + model_description=r"p_0 * \exp(p_2 x) + p4", + ), + SeriesDef( + name="curve3", + fit_func=lambda x, par0, par1, par2, par3, par4: fit_function.exponential_decay( + x, amp=par0, lamb=par3, baseline=par4 ), - ], - ) - self.err_decimal = 3 + filter_kwargs={"op1": 3, "op2": True}, + model_description=r"p_0 * \exp(p_3 x) + p4", + ), + ] def test_parsed_fit_params(self): """Test parsed fit params.""" - self.assertSetEqual( - set(self.analysis._fit_params()), {"par0", "par1", "par2", "par3", "par4"} - ) + analysis = self.TestAnalysis() + self.assertSetEqual(set(analysis.parameters), {"par0", "par1", "par2", "par3", "par4"}) def test_cannot_create_invalid_series_fit(self): """Test we cannot create invalid analysis instance.""" @@ -176,100 +134,134 @@ def test_cannot_create_invalid_series_fit(self): def test_data_extraction(self): """Test data extraction method.""" - self.analysis.set_options(x_key="xval") + xvalues = np.linspace(1.0, 5.0, 10) + + analysis = self.TestAnalysis() + analysis.set_options(data_processor=DataProcessor("counts", [Probability("1")])) # data to analyze test_data0 = simulate_output_data( func=fit_function.exponential_decay, - xvals=self.xvalues, + xvals=xvalues, param_dict={"amp": 1.0}, - type=1, - valid=True, + op1=1, + op2=True, + ) + + curve_data = analysis._run_data_processing( + raw_data=test_data0.data(), + series=analysis.__series__, + ) + + # check x values + ref_x = xvalues + np.testing.assert_array_almost_equal(curve_data.x, ref_x) + + # check y values + ref_y = fit_function.exponential_decay(xvalues, amp=1.0) + np.testing.assert_array_almost_equal(curve_data.y, ref_y, decimal=3) + + # check data allocation + ref_alloc = np.zeros(10, dtype=int) + self.assertListEqual(list(curve_data.data_allocation), list(ref_alloc)) + + def test_data_extraction_with_subset(self): + """Test data extraction method with multiple series.""" + xvalues = np.linspace(1.0, 5.0, 10) + + analysis = self.TestAnalysis() + analysis.set_options(data_processor=DataProcessor("counts", [Probability("1")])) + + # data to analyze + test_data0 = simulate_output_data( + func=fit_function.exponential_decay, + xvals=xvalues, + param_dict={"amp": 1.0}, + op1=1, + op2=True, ) - # fake data test_data1 = simulate_output_data( func=fit_function.exponential_decay, - xvals=self.xvalues, + xvals=xvalues, param_dict={"amp": 0.5}, - type=2, - valid=False, + op1=2, + op2=True, ) - # merge two experiment data - for datum in test_data1.data(): - test_data0.add_data(datum) + # get subset + curve_data_of_1 = analysis._run_data_processing( + raw_data=test_data0.data() + test_data1.data(), + series=analysis.__series__, + ).get_subset_of("curve1") - self.analysis._extract_curves( - experiment_data=test_data0, data_processor=probability(outcome="1") - ) + # check x values + ref_x = xvalues + np.testing.assert_array_almost_equal(curve_data_of_1.x, ref_x) - raw_data = self.analysis._data(label="raw_data") + # check y values + ref_y = fit_function.exponential_decay(xvalues, amp=1.0) + np.testing.assert_array_almost_equal(curve_data_of_1.y, ref_y, decimal=3) - xdata = raw_data.x - ydata = raw_data.y - sigma = raw_data.y_err - d_index = raw_data.data_index + # check data allocation + ref_alloc = np.zeros(10, dtype=int) + self.assertListEqual(list(curve_data_of_1.data_allocation), list(ref_alloc)) - # check if the module filter off data: valid=False - self.assertEqual(len(xdata), 20) + def test_create_results(self): + """Test creating analysis results.""" + analysis = self.TestAnalysis() + analysis.set_options( + result_parameters=["par0", ParameterRepr("par1", "Param1", "SomeUnit")], + ) - # check x values - ref_x = np.concatenate((self.xvalues, self.xvalues)) - np.testing.assert_array_almost_equal(xdata, ref_x) + pcov = np.diag(np.ones(5)) + popt = np.asarray([1.0, 2.0, 3.0, 4.0, 5.0]) + fit_params = correlated_values(popt, pcov) - # check y values - ref_y = np.concatenate( - ( - fit_function.exponential_decay(self.xvalues, amp=1.0), - fit_function.exponential_decay(self.xvalues, amp=0.5), - ) + fit_data = FitData( + popt=fit_params, + popt_keys=["par0", "par1", "par2", "par3", "par4", "par5"], + pcov=pcov, + reduced_chisq=2.0, + dof=0, + x_data=np.arange(5), + y_data=np.arange(5), ) - np.testing.assert_array_almost_equal(ydata, ref_y, decimal=self.err_decimal) - # check series - ref_series = np.concatenate((np.zeros(10, dtype=int), -1 * np.ones(10, dtype=int))) - self.assertListEqual(list(d_index), list(ref_series)) + outcomes = analysis._create_analysis_results(fit_data, quality="good", test_val=1) - # check y errors - ref_yerr = ref_y * (1 - ref_y) / 100000 - np.testing.assert_array_almost_equal(sigma, ref_yerr, decimal=self.err_decimal) + # entry name + self.assertEqual(outcomes[0].name, "@Parameters_TestAnalysis") + self.assertEqual(outcomes[1].name, "par0") + self.assertEqual(outcomes[2].name, "Param1") - def test_get_subset(self): - """Test that get subset data from full data array.""" - # data to analyze - fake_data = [ - {"data": 1, "metadata": {"xval": 1, "type": 1, "valid": True}}, - {"data": 2, "metadata": {"xval": 2, "type": 2, "valid": True}}, - {"data": 3, "metadata": {"xval": 3, "type": 1, "valid": True}}, - {"data": 4, "metadata": {"xval": 4, "type": 3, "valid": True}}, - {"data": 5, "metadata": {"xval": 5, "type": 3, "valid": True}}, - {"data": 6, "metadata": {"xval": 6, "type": 4, "valid": True}}, # this if fake - ] - expdata = ExperimentData(experiment=FakeExperiment()) - for datum in fake_data: - expdata.add_data(datum) - - def _processor(datum): - return datum["data"], datum["data"] * 2 - - self.analysis.set_options(x_key="xval") - self.analysis._extract_curves(expdata, data_processor=_processor) - - filt_data = self.analysis._data(series_name="curve1") - np.testing.assert_array_equal(filt_data.x, np.asarray([1, 3], dtype=float)) - np.testing.assert_array_equal(filt_data.y, np.asarray([1, 3], dtype=float)) - np.testing.assert_array_equal(filt_data.y_err, np.asarray([2, 6], dtype=float)) - - filt_data = self.analysis._data(series_name="curve2") - np.testing.assert_array_equal(filt_data.x, np.asarray([2], dtype=float)) - np.testing.assert_array_equal(filt_data.y, np.asarray([2], dtype=float)) - np.testing.assert_array_equal(filt_data.y_err, np.asarray([4], dtype=float)) - - filt_data = self.analysis._data(series_name="curve3") - np.testing.assert_array_equal(filt_data.x, np.asarray([4, 5], dtype=float)) - np.testing.assert_array_equal(filt_data.y, np.asarray([4, 5], dtype=float)) - np.testing.assert_array_equal(filt_data.y_err, np.asarray([8, 10], dtype=float)) + # entry value + self.assertEqual(outcomes[1].value, fit_params[0]) + self.assertEqual(outcomes[2].value, fit_params[1]) + + # other metadata + self.assertEqual(outcomes[2].quality, "good") + self.assertEqual(outcomes[2].chisq, 2.0) + ref_meta = { + "test_val": 1, + "unit": "SomeUnit", + } + self.assertDictEqual(outcomes[2].extra, ref_meta) + + def test_invalid_options(self): + """Test setting invalid options.""" + analysis = self.TestAnalysis() + + class InvalidClass: + """Dummy class.""" + + pass + + with self.assertRaises(TypeError): + analysis.set_options(data_processor=InvalidClass()) + + with self.assertRaises(TypeError): + analysis.set_options(curve_drawer=InvalidClass()) class TestCurveAnalysisIntegration(QiskitExperimentsTestCase):