Source code for qiskit_machine_learning.datasets.ad_hoc

# This code is part of a Qiskit project.
#
# (C) Copyright IBM 2018, 2023.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""
ad hoc dataset
"""
from __future__ import annotations

import itertools as it
from functools import reduce
from typing import Tuple, Dict, List

import numpy as np
from qiskit.utils import optionals
from qiskit_algorithms.utils import algorithm_globals
from sklearn import preprocessing


[docs]def ad_hoc_data(
    training_size: int,
    test_size: int,
    n: int,
    gap: int,
    plot_data: bool = False,
    one_hot: bool = True,
    include_sample_total: bool = False,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray] | Tuple[
    np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray
]:
    r"""Generates a toy dataset that can be fully separated with
    :class:`~qiskit.circuit.library.ZZFeatureMap` according to the procedure
    outlined in [1]. To construct the dataset, we first sample uniformly
    distributed vectors :math:`\vec{x} \in (0, 2\pi]^{n}` and apply the
    feature map

    .. math::
        |\Phi(\vec{x})\rangle = U_{{\Phi} (\vec{x})} H^{\otimes n} U_{{\Phi} (\vec{x})}
        H^{\otimes n} |0^{\otimes n} \rangle

    where

    .. math::
        U_{{\Phi} (\vec{x})} = \exp \left( i \sum_{S \subseteq [n] } \phi_S(\vec{x})
        \prod_{i \in S} Z_i \right)

    and

    .. math::
        \begin{cases}
        \phi_{\{i, j\}} = (\pi - x_i)(\pi - x_j) \\
        \phi_{\{i\}} = x_i
        \end{cases}

    We then attribute labels to the vectors according to the rule

    .. math::
        m(\vec{x}) = \begin{cases}
        1 & \langle \Phi(\vec{x}) | V^\dagger \prod_i Z_i V | \Phi(\vec{x}) \rangle > \Delta \\
        -1 & \langle \Phi(\vec{x}) | V^\dagger \prod_i Z_i V | \Phi(\vec{x}) \rangle < -\Delta
        \end{cases}

    where :math:`\Delta` is the separation gap, and
    :math:`V\in \mathrm{SU}(4)` is a random unitary.

    The current implementation only works with n = 2 or 3.

    **References:**

    [1] Havlíček V, Córcoles AD, Temme K, Harrow AW, Kandala A, Chow JM,
    Gambetta JM. Supervised learning with quantum-enhanced feature
    spaces. Nature. 2019 Mar;567(7747):209-12.
    `arXiv:1804.11326 <https://arxiv.org/abs/1804.11326>`_

    Args:
        training_size: the number of training samples.
        test_size: the number of testing samples.
        n: number of qubits (dimension of the feature space). Must be 2 or 3.
        gap: separation gap (:math:`\Delta`).
        plot_data: whether to plot the data. Requires matplotlib.
        one_hot: if True, return the data in one-hot format.
        include_sample_total: if True, return all points in the uniform
            grid in addition to training and testing samples.

    Returns:
        Training and testing samples.

    Raises:
        ValueError: if n is not 2 or 3.
    """
    class_labels = [r"A", r"B"]
    count = 0
    if n == 2:
        count = 100
    elif n == 3:
        count = 20  # coarseness of data separation
    else:
        raise ValueError(f"Supported values of 'n' are 2 and 3 only, but {n} is provided.")

    # Define auxiliary matrices and initial state
    z = np.diag([1, -1])
    i_2 = np.eye(2)
    h_2 = np.array([[1, 1], [1, -1]]) / np.sqrt(2)
    h_n = reduce(np.kron, [h_2] * n)
    psi_0 = np.ones(2**n) / np.sqrt(2**n)

    # Generate Z matrices acting on each qubits
    z_i = np.array([reduce(np.kron, [i_2] * i + [z] + [i_2] * (n - i - 1)) for i in range(n)])

    # Construct the parity operator
    bitstrings = ["".join(bstring) for bstring in it.product(*[["0", "1"]] * n)]
    if n == 2:
        bitstring_parity = [bstr.count("1") % 2 for bstr in bitstrings]
        d_m = np.diag((-1) ** np.array(bitstring_parity))
    elif n == 3:
        bitstring_majority = [0 if bstr.count("0") > 1 else 1 for bstr in bitstrings]
        d_m = np.diag((-1) ** np.array(bitstring_majority))

    # Generate a random unitary operator by collecting eigenvectors of a
    # random hermitian operator
    basis = algorithm_globals.random.random(
        (2**n, 2**n)
    ) + 1j * algorithm_globals.random.random((2**n, 2**n))
    basis = np.array(basis).conj().T @ np.array(basis)
    eigvals, eigvecs = np.linalg.eig(basis)
    idx = eigvals.argsort()[::-1]
    eigvecs = eigvecs[:, idx]
    m_m = eigvecs.conj().T @ d_m @ eigvecs

    # Generate a grid of points in the feature space and compute the
    # expectation value of the parity
    xvals = np.linspace(0, 2 * np.pi, count, endpoint=False)
    ind_pairs = list(it.combinations(range(n), 2))
    _sample_total = []
    for x in it.product(*[xvals] * n):
        x_arr = np.array(x)
        phi = np.sum(x_arr[:, None, None] * z_i, axis=0)
        phi += sum(
            ((np.pi - x_arr[i1]) * (np.pi - x_arr[i2]) * z_i[i1] @ z_i[i2] for i1, i2 in ind_pairs)
        )
        # u_u was actually scipy.linalg.expm(1j * phi), but this method is
        # faster because phi is always a diagonal matrix.
        # We first extract the diagonal elements, then do exponentiation, then
        # construct a diagonal matrix from them.
        u_u = np.diag(np.exp(1j * np.diag(phi)))
        psi = u_u @ h_n @ u_u @ psi_0
        exp_val = np.real(psi.conj().T @ m_m @ psi)
        if np.abs(exp_val) > gap:
            _sample_total.append(np.sign(exp_val))
        else:
            _sample_total.append(0)
    sample_total = np.array(_sample_total).reshape(*[count] * n)

    # Extract training and testing samples from grid
    x_sample, y_sample = _sample_ad_hoc_data(sample_total, xvals, training_size + test_size, n)

    if plot_data:
        _plot_ad_hoc_data(x_sample, y_sample, training_size)

    training_input = {
        key: (x_sample[y_sample == k, :])[:training_size] for k, key in enumerate(class_labels)
    }
    test_input = {
        key: (x_sample[y_sample == k, :])[training_size : (training_size + test_size)]
        for k, key in enumerate(class_labels)
    }

    training_feature_array, training_label_array = _features_and_labels_transform(
        training_input, class_labels, one_hot
    )
    test_feature_array, test_label_array = _features_and_labels_transform(
        test_input, class_labels, one_hot
    )

    if include_sample_total:
        return (
            training_feature_array,
            training_label_array,
            test_feature_array,
            test_label_array,
            sample_total,
        )
    else:
        return (
            training_feature_array,
            training_label_array,
            test_feature_array,
            test_label_array,
        )


def _sample_ad_hoc_data(sample_total, xvals, num_samples, n):
    count = sample_total.shape[0]
    sample_a, sample_b = [], []
    for i, sample_list in enumerate([sample_a, sample_b]):
        label = 1 if i == 0 else -1
        while len(sample_list) < num_samples:
            draws = tuple(algorithm_globals.random.choice(count) for i in range(n))
            if sample_total[draws] == label:
                sample_list.append([xvals[d] for d in draws])

    labels = np.array([0] * num_samples + [1] * num_samples)
    samples = [sample_a, sample_b]
    samples = np.reshape(samples, (2 * num_samples, n))
    return samples, labels


@optionals.HAS_MATPLOTLIB.require_in_call
def _plot_ad_hoc_data(x_total, y_total, training_size):
    import matplotlib.pyplot as plt

    n = x_total.shape[1]
    fig = plt.figure()
    projection = "3d" if n == 3 else None
    ax1 = fig.add_subplot(1, 1, 1, projection=projection)
    for k in range(0, 2):
        ax1.scatter(*x_total[y_total == k][:training_size].T)
    ax1.set_title("Ad-hoc Data")
    plt.show()


def _features_and_labels_transform(
    dataset: Dict[str, np.ndarray], class_labels: List[str], one_hot: bool = True
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Converts a dataset into arrays of features and labels.

    Args:
        dataset: A dictionary in the format of {'A': numpy.ndarray, 'B': numpy.ndarray, ...}
        class_labels: A list of classes in the dataset
        one_hot (bool): if True - return one-hot encoded label

    Returns:
        A tuple of features as np.ndarray, label as np.ndarray
    """
    features = np.concatenate(list(dataset.values()))

    raw_labels = []
    for category in dataset.keys():
        num_samples = dataset[category].shape[0]
        raw_labels += [category] * num_samples

    if not raw_labels:
        # no labels, empty dataset
        labels = np.zeros((0, len(class_labels)))
        return features, labels

    if one_hot:
        encoder = preprocessing.OneHotEncoder()
        encoder.fit(np.array(class_labels).reshape(-1, 1))
        labels = encoder.transform(np.array(raw_labels).reshape(-1, 1))
        if not isinstance(labels, np.ndarray):
            labels = np.array(labels.todense())
    else:
        encoder = preprocessing.LabelEncoder()
        encoder.fit(np.array(class_labels))
        labels = encoder.transform(np.array(raw_labels))

    return features, labels