English
Languages
English
Bengali
French
Hindi
Italian
Japanese
Korean
Malayalam
Russian
Spanish
Tamil
Turkish
Vietnamese
Shortcuts

Source code for qiskit_machine_learning.datasets.wine

# This code is part of Qiskit.
#
# (C) Copyright IBM 2018, 2021.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""
wine dataset
"""

import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from qiskit.exceptions import MissingOptionalLibraryError
from .dataset_helper import features_and_labels_transform


[docs]def wine(training_size, test_size, n, plot_data=False, one_hot=True): """returns wine dataset""" class_labels = [r"A", r"B", r"C"] data, target = datasets.load_wine(return_X_y=True) sample_train, sample_test, label_train, label_test = train_test_split( data, target, test_size=test_size, random_state=7 ) # Now we standardize for gaussian around 0 with unit variance std_scale = StandardScaler().fit(sample_train) sample_train = std_scale.transform(sample_train) sample_test = std_scale.transform(sample_test) # Now reduce number of features to number of qubits pca = PCA(n_components=n).fit(sample_train) sample_train = pca.transform(sample_train) sample_test = pca.transform(sample_test) # Scale to the range (-1,+1) samples = np.append(sample_train, sample_test, axis=0) minmax_scale = MinMaxScaler((-1, 1)).fit(samples) sample_train = minmax_scale.transform(sample_train) sample_test = minmax_scale.transform(sample_test) # Pick training size number of samples from each distro training_input = { key: (sample_train[label_train == k, :])[:training_size] for k, key in enumerate(class_labels) } test_input = { key: (sample_test[label_test == k, :])[:test_size] for k, key in enumerate(class_labels) } training_feature_array, training_label_array = features_and_labels_transform( training_input, class_labels, one_hot ) test_feature_array, test_label_array = features_and_labels_transform( test_input, class_labels, one_hot ) if plot_data: try: import matplotlib.pyplot as plt except ImportError as ex: raise MissingOptionalLibraryError( libname="Matplotlib", name="wine", pip_install="pip install matplotlib" ) from ex for k in range(0, 3): plt.scatter( sample_train[label_train == k, 0][:training_size], sample_train[label_train == k, 1][:training_size], ) plt.title("PCA dim. reduced Wine dataset") plt.show() return ( training_feature_array, training_label_array, test_feature_array, test_label_array, )

© Copyright 2018, 2021, Qiskit Machine Learning Development Team.

Built with Sphinx using a theme provided by Read the Docs.