Source code for qiskit_machine_learning.algorithms.classifiers.pegasos_qsvc

# This code is part of a Qiskit project.
#
# (C) Copyright IBM 2022, 2023.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""Pegasos Quantum Support Vector Classifier."""
from __future__ import annotations

import logging
from datetime import datetime
from typing import Dict

import numpy as np
from qiskit_algorithms.utils import algorithm_globals
from sklearn.base import ClassifierMixin

from ...algorithms.serializable_model import SerializableModelMixin
from ...exceptions import QiskitMachineLearningError
from ...kernels import BaseKernel, FidelityQuantumKernel


logger = logging.getLogger(__name__)


[docs]class PegasosQSVC(ClassifierMixin, SerializableModelMixin):
    r"""
    Implements Pegasos Quantum Support Vector Classifier algorithm. The algorithm has been
    developed in [1] and includes methods ``fit``, ``predict`` and ``decision_function`` following
    the signatures
    of `sklearn.svm.SVC <https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html>`_.
    This implementation is adapted to work with quantum kernels.

    **Example**

    .. code-block:: python

        quantum_kernel = FidelityQuantumKernel()

        pegasos_qsvc = PegasosQSVC(quantum_kernel=quantum_kernel)
        pegasos_qsvc.fit(sample_train, label_train)
        pegasos_qsvc.predict(sample_test)

    **References**
        [1]: Shalev-Shwartz et al., Pegasos: Primal Estimated sub-GrAdient SOlver for SVM.
            `Pegasos for SVM <https://home.ttic.edu/~nati/Publications/PegasosMPB.pdf>`_

    """

    FITTED = 0
    UNFITTED = 1

    # pylint: disable=invalid-name
    def __init__(
        self,
        quantum_kernel: BaseKernel | None = None,
        C: float = 1.0,
        num_steps: int = 1000,
        precomputed: bool = False,
        seed: int | None = None,
    ) -> None:
        """
        Args:
            quantum_kernel: A quantum kernel to be used for classification.
                Has to be ``None`` when a precomputed kernel is used. If None,
                and ``precomputed`` is ``False``, the quantum kernel will default to
                :class:`~qiskit_machine_learning.kernels.FidelityQuantumKernel`.
            C: Positive regularization parameter. The strength of the regularization is inversely
                proportional to C. Smaller ``C`` induce smaller weights which generally helps
                preventing overfitting. However, due to the nature of this algorithm, some of the
                computation steps become trivial for larger ``C``. Thus, larger ``C`` improve
                the performance of the algorithm drastically. If the data is linearly separable
                in feature space, ``C`` should be chosen to be large. If the separation is not
                perfect, ``C`` should be chosen smaller to prevent overfitting.
            num_steps: The number of steps in the Pegasos algorithm. There is no early stopping
                criterion. The algorithm iterates over all steps.
            precomputed: A boolean flag indicating whether a precomputed kernel is used. Set it to
                ``True`` in case of precomputed kernel.
            seed: A seed for the random number generator.

        Raises:
            ValueError:
                - if ``quantum_kernel`` is passed and ``precomputed`` is set to ``True``. To use
                a precomputed kernel, ``quantum_kernel`` has to be of the ``None`` type.
                - if C is not a positive number.
        """

        if precomputed:
            if quantum_kernel is not None:
                raise ValueError("'quantum_kernel' has to be None to use a precomputed kernel")
        else:
            if quantum_kernel is None:
                quantum_kernel = FidelityQuantumKernel()

        self._quantum_kernel = quantum_kernel
        self._precomputed = precomputed
        self._num_steps = num_steps
        if seed is not None:
            algorithm_globals.random_seed = seed

        if C > 0:
            self.C = C
        else:
            raise ValueError(f"C has to be a positive number, found {C}.")

        # these are the parameters being fit and are needed for prediction
        self._alphas: Dict[int, int] | None = None
        self._x_train: np.ndarray | None = None
        self._n_samples: int | None = None
        self._y_train: np.ndarray | None = None
        self._label_map: Dict[int, int] | None = None
        self._label_pos: int | None = None
        self._label_neg: int | None = None

        # added to all kernel values to include an implicit bias to the hyperplane
        self._kernel_offset = 1

        # for compatibility with the base SVC class. Set as unfitted.
        self.fit_status_ = PegasosQSVC.UNFITTED

    # pylint: disable=invalid-name
[docs]    def fit(
        self, X: np.ndarray, y: np.ndarray, sample_weight: np.ndarray | None = None
    ) -> "PegasosQSVC":
        """Fit the model according to the given training data.

        Args:
            X: Train features. For a callable kernel (an instance of
               :class:`~qiskit_machine_learning.kernels.BaseKernel`) the shape
               should be ``(n_samples, n_features)``, for a precomputed kernel the shape should be
               ``(n_samples, n_samples)``.
            y: shape (n_samples), train labels . Must not contain more than two unique labels.
            sample_weight: this parameter is not supported, passing a value raises an error.

        Returns:
            ``self``, Fitted estimator.

        Raises:
            ValueError:
                - X and/or y have the wrong shape.
                - X and y have incompatible dimensions.
                - y includes more than two unique labels.
                - Pre-computed kernel matrix has the wrong shape and/or dimension.

            NotImplementedError:
                - when a sample_weight which is not None is passed.
        """
        # check whether the data have the right format
        if np.ndim(X) != 2:
            raise ValueError("X has to be a 2D array")
        if np.ndim(y) != 1:
            raise ValueError("y has to be a 1D array")
        if len(np.unique(y)) != 2:
            raise ValueError("Only binary classification is supported")
        if X.shape[0] != y.shape[0]:
            raise ValueError("'X' and 'y' have to contain the same number of samples")
        if self._precomputed and X.shape[0] != X.shape[1]:
            raise ValueError(
                "For a precomputed kernel, X should be in shape (n_samples, n_samples)"
            )
        if sample_weight is not None:
            raise NotImplementedError(
                "Parameter 'sample_weight' is not supported. All samples have to be weighed equally"
            )
        # reset the fit state
        self.fit_status_ = PegasosQSVC.UNFITTED

        # the algorithm works with labels in {+1, -1}
        self._label_pos = np.unique(y)[0]
        self._label_neg = np.unique(y)[1]
        self._label_map = {self._label_pos: +1, self._label_neg: -1}

        # the training data are later needed for prediction
        self._x_train = X
        self._y_train = y
        self._n_samples = X.shape[0]

        # empty dictionary to represent sparse array
        self._alphas = {}

        t_0 = datetime.now()
        # training loop
        for step in range(1, self._num_steps + 1):
            # for every step, a random index (determining a random datum) is fixed
            i = algorithm_globals.random.integers(0, len(y))

            value = self._compute_weighted_kernel_sum(i, X, training=True)

            if (self._label_map[y[i]] * self.C / step) * value < 1:
                # only way for a component of alpha to become non zero
                self._alphas[i] = self._alphas.get(i, 0) + 1

        self.fit_status_ = PegasosQSVC.FITTED

        logger.debug("fit completed after %s", str(datetime.now() - t_0)[:-7])

        return self

    # pylint: disable=invalid-name
[docs]    def predict(self, X: np.ndarray) -> np.ndarray:
        """
        Perform classification on samples in X.

        Args:
            X: Features. For a callable kernel (an instance of
               :class:`~qiskit_machine_learning.kernels.BaseKernel`) the shape
               should be ``(m_samples, n_features)``, for a precomputed kernel the shape should be
               ``(m_samples, n_samples)``. Where ``m`` denotes the set to be predicted and ``n`` the
               size of the training set. In that case, the kernel values in X have to be calculated
               with respect to the elements of the set to be predicted and the training set.

        Returns:
            An array of the shape (n_samples), the predicted class labels for samples in X.

        Raises:
            QiskitMachineLearningError:
                - predict is called before the model has been fit.
            ValueError:
                - Pre-computed kernel matrix has the wrong shape and/or dimension.
        """

        t_0 = datetime.now()
        values = self.decision_function(X)
        y = np.array([self._label_pos if val > 0 else self._label_neg for val in values])
        logger.debug("prediction completed after %s", str(datetime.now() - t_0)[:-7])

        return y

[docs]    def decision_function(self, X: np.ndarray) -> np.ndarray:
        """
        Evaluate the decision function for the samples in X.

        Args:
            X: Features. For a callable kernel (an instance of
               :class:`~qiskit_machine_learning.kernels.BaseKernel`) the shape
               should be ``(m_samples, n_features)``, for a precomputed kernel the shape should be
               ``(m_samples, n_samples)``. Where ``m`` denotes the set to be predicted and ``n`` the
               size of the training set. In that case, the kernel values in X have to be calculated
               with respect to the elements of the set to be predicted and the training set.

        Returns:
            An array of the shape (n_samples), the decision function of the sample.

        Raises:
            QiskitMachineLearningError:
                - the method is called before the model has been fit.
            ValueError:
                - Pre-computed kernel matrix has the wrong shape and/or dimension.
        """
        if self.fit_status_ == PegasosQSVC.UNFITTED:
            raise QiskitMachineLearningError("The PegasosQSVC has to be fit first")
        if np.ndim(X) != 2:
            raise ValueError("X has to be a 2D array")
        if self._precomputed and self._n_samples != X.shape[1]:
            raise ValueError(
                "For a precomputed kernel, X should be in shape (m_samples, n_samples)"
            )

        values = np.zeros(X.shape[0])
        for i in range(X.shape[0]):
            values[i] = self._compute_weighted_kernel_sum(i, X, training=False)

        return values

    def _compute_weighted_kernel_sum(self, index: int, X: np.ndarray, training: bool) -> float:
        """Helper function to compute the weighted sum over support vectors used for both training
        and prediction with the Pegasos algorithm.

        Args:
            index: fixed index distinguishing some datum
            X: Features
            training: flag indicating whether the loop is used within training or prediction

        Returns:
            Weighted sum of kernel evaluations employed in the Pegasos algorithm
        """
        # non-zero indices corresponding to the support vectors
        support_indices = list(self._alphas.keys())

        # for training
        if training:
            # support vectors
            x_supp = X[support_indices]
        # for prediction
        else:
            x_supp = self._x_train[support_indices]
        if not self._precomputed:
            # evaluate kernel function only for the fixed datum and the support vectors
            kernel = self._quantum_kernel.evaluate(X[index], x_supp) + self._kernel_offset
        else:
            kernel = X[index, support_indices]

        # map the training labels of the support vectors to {-1,1}
        y = np.array(list(map(self._label_map.get, self._y_train[support_indices])))
        # weights for the support vectors
        alphas = np.array(list(self._alphas.values()))
        # this value corresponds to a sum of kernel values weighted by their labels and alphas
        value = np.sum(alphas * y * kernel)

        return value

    @property
    def quantum_kernel(self) -> BaseKernel:
        """Returns quantum kernel"""
        return self._quantum_kernel

    @quantum_kernel.setter
    def quantum_kernel(self, quantum_kernel: BaseKernel):
        """
        Sets quantum kernel. If previously a precomputed kernel was set, it is reset to ``False``.
        """

        self._quantum_kernel = quantum_kernel
        # quantum kernel is set, so we assume the kernel is not precomputed
        self._precomputed = False

        # reset training status
        self._reset_state()

    @property
    def num_steps(self) -> int:
        """Returns number of steps in the Pegasos algorithm."""
        return self._num_steps

    @num_steps.setter
    def num_steps(self, num_steps: int):
        """Sets the number of steps to be used in the Pegasos algorithm."""
        self._num_steps = num_steps

        # reset training status
        self._reset_state()

    @property
    def precomputed(self) -> bool:
        """Returns a boolean flag indicating whether a precomputed kernel is used."""
        return self._precomputed

    @precomputed.setter
    def precomputed(self, precomputed: bool):
        """Sets the pre-computed kernel flag. If ``True`` is passed then the previous kernel is
        cleared. If ``False`` is passed then a new instance of
        :class:`~qiskit_machine_learning.kernels.FidelityQuantumKernel` is created."""
        self._precomputed = precomputed
        if precomputed:
            # remove the kernel, a precomputed will
            self._quantum_kernel = None
        else:
            # re-create a new default quantum kernel
            self._quantum_kernel = FidelityQuantumKernel()

        # reset training status
        self._reset_state()

    def _reset_state(self):
        """Resets internal data structures used in training."""
        self.fit_status_ = PegasosQSVC.UNFITTED
        self._alphas = None
        self._x_train = None
        self._n_samples = None
        self._y_train = None
        self._label_map = None
        self._label_pos = None
        self._label_neg = None