svm_perceptron.py

# -*- coding: utf-8 -*-
"""SVM-Perceptron.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1PxSPdx848AJrp5vsVYbGL7F_bSiiaHSa

# Config Environment
"""

import pandas as pd
from sklearn.decomposition import PCA
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Perceptron
from sklearn.metrics.cluster import rand_score
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.svm import SVC, LinearSVC

"""# Normalizer

Normalize non-float numbers and remove NAN or Inf values and replace them with zero.

due to the small number of NANS we could also drop them

Class to help normalize data
"""

class DataNormalizer:

  def __init__(self,raw_data):
    self.raw_data = raw_data

  def normalize_raw_data(self, print_stats=False):

    available_labels_values = list(set(self.raw_data.values[:,23]))
    available_labels_values.sort()
    available_lables_values = { val : index for index, val in enumerate(available_labels_values)}
    if print_stats:
      print(available_lables_values)

    for key, dedicated_number in available_lables_values.items():
      columns_name = self.raw_data.columns[23]
      self.raw_data.loc[self.raw_data[columns_name] == key, columns_name] = dedicated_number

    
    available_gender_values = list(set(self.raw_data.values[:,1]))
    available_gender_values.sort()
    available_gender_values = { val : index for index, val in enumerate(available_gender_values)}
    if print_stats:
      print(available_gender_values)

    for key, dedicated_number in available_gender_values.items():
      columns_name = self.raw_data.columns[1]
      self.raw_data.loc[self.raw_data[columns_name] == key, columns_name] = dedicated_number
    
    available_customer_type = list(set(self.raw_data.values[:,2]))
    available_customer_type.sort()
    available_customer_type = { val : index for index, val in enumerate(available_customer_type)}
    if print_stats:
      print(available_customer_type)

    for key, dedicated_number in available_customer_type.items():
      columns_name = self.raw_data.columns[2]
      self.raw_data.loc[self.raw_data[columns_name] == key, columns_name] = dedicated_number
    
    available_type_of_travel = list(set(self.raw_data.values[:,4]))
    available_type_of_travel.sort()
    available_type_of_travel = { val : index for index, val in enumerate(available_type_of_travel)}
    if print_stats:
      print(available_type_of_travel)

    for key, dedicated_number in available_type_of_travel.items():
      columns_name = self.raw_data.columns[4]
      self.raw_data.loc[self.raw_data[columns_name] == key, columns_name] = dedicated_number
    
    available_classes = list(set(self.raw_data.values[:,5]))
    available_classes.sort()
    available_classes = { val : index for index, val in enumerate(available_classes)}
    if print_stats:
      print(available_classes)

    for key, dedicated_number in available_classes.items():
      columns_name = self.raw_data.columns[5]
      self.raw_data.loc[self.raw_data[columns_name] == key, columns_name] = dedicated_number
    
    self.raw_data.replace([np.inf, -np.inf], np.nan, inplace=True)
    return self.raw_data.dropna()

"""# Load Data

Load Dataframe from csv files and provide needed data for models

Class to help extract features and labels from loaded data
"""

class DataLoader:

  def __init__(self):
    self.raw_data_filePath = '/content/drive/MyDrive/AI/SVM-Perceptron/data/train.csv'
    self.test_data_filePath = '/content/drive/MyDrive/AI/SVM-Perceptron/data/test.csv'
    self.raw_data = None
    self.test_data = None
    self.normalized_raw_data = None
    self.normalized_raw_test_data = None
    self.train_data_features = None
    self.train_data_labels = None
    self.test_data_features = None
    self.test_data_labels = None

  # Load Raw Data from CSV , Replace Infinity with NAN, and replace every NAN with zero for Raw Data with Normalizer

  def load_raw_data(self):
    if not self.raw_data:
      self.raw_data = pd.read_csv(self.raw_data_filePath)
      self.raw_data = self.raw_data.drop(self.raw_data.columns[[0]], axis=1) 
      normalizer = DataNormalizer(self.raw_data)
      self.raw_data = normalizer.normalize_raw_data()
    print('Raw Data Shape: ', self.raw_data.shape)
    return self.raw_data
  
  def load_test_data(self):
    if not self.test_data:
      self.test_data = pd.read_csv(self.test_data_filePath)
      self.test_data = self.test_data.drop(self.test_data.columns[[0]], axis=1)
      normalizer = DataNormalizer(self.test_data)
      self.test_data = normalizer.normalize_raw_data() 
    print('Test Data Shape: ', self.test_data.shape)
    return self.test_data

  def load_train_data_features(self):
    if not self.train_data_features:
      self.train_data_features = self.raw_data.values[:, 1:23].astype('float32')
      print('Train Data Shape', self.train_data_features.shape)
      return self.train_data_features

  def load_train_data_labels(self):
    if not self.train_data_labels:
      self.train_data_labels = self.raw_data.values[:, 23].astype('float32')
      print('Train Data Labels Shape', self.train_data_labels.shape)
      return self.train_data_labels

  def load_test_data_features(self):
    if not self.test_data_features:
      self.test_data_features = test_data.values[:, 1:23].astype('float32')
      print('Test Data Shape', self.test_data_features.shape)
      return self.test_data_features

  def load_test_data_labels(self):
    if not self.test_data_labels:
      self.test_data_labels = test_data.values[:, 23].astype('float32')
      print('Test Data Labels Shape', self.test_data_labels.shape)
      return self.test_data_labels

loader = DataLoader()

raw_data = loader.load_raw_data()
test_data = loader.load_test_data()

"""Load Raw Data without any normalization or manipulations"""

train_data_features = loader.load_train_data_features()

train_data_labels = loader.load_train_data_labels()

"""Load Normalized and preprocessed dataframe for train data"""

test_data_features = loader.load_test_data_features()

test_data_labels = loader.load_test_data_labels()

"""Load Normalized and preprocessed dataframe for test data

# Perceptron

Test perceptron models with different hyperparam and analysis the result
"""

class PerceptronTest:

  def __init__(self, train_data_features, train_data_labels):
    self.perceptron_model = None
    self.train_data_features = train_data_features
    self.train_data_labels = train_data_labels
  
  def simple_test(self, test_data_features, test_data_labels):
    self.perceptron_model = Perceptron(random_state=1)
    self.perceptron_model.fit(self.train_data_features, self.train_data_labels)
    print(f"Perceptron model score: { self.perceptron_model.score(self.train_data_features, self.train_data_labels) * 100}")

    predicted_labels = np.array(self.perceptron_model.predict(test_data_features))
    print('rand score of perdiction', rand_score(test_data_labels, predicted_labels) * 100)
    print('accuracy of perdiction', accuracy_score(test_data_labels, predicted_labels, normalize=False))

  def test_best(self, eta, maxIt, test_data_features, test_data_labels):
    self.perceptron_model = Perceptron(random_state=20, eta0=0.01, max_iter=maxIt)
    self.perceptron_model.fit(self.train_data_features, self.train_data_labels)
    print(f"Perceptron model score: { self.perceptron_model.score(self.train_data_features, self.train_data_labels) * 100}")

    predicted_labels = np.array(self.perceptron_model.predict(test_data_features))
    print('rand score of perdiction', rand_score(test_data_labels, predicted_labels) * 100)
    print('accuracy of perdiction', accuracy_score(test_data_labels, predicted_labels, normalize=False))

  
  def learning_rate_test(self, grid=None):
    perceptron_model = Perceptron(random_state=20)
    
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

    if not grid:
      grid = dict()
      grid['eta0'] = [0.0001, 0.001, 0.01, 0.1, 5.0]

    search = GridSearchCV(perceptron_model, grid, scoring='accuracy', cv=cv, n_jobs=-1)

    results = search.fit(self.train_data_features, self.train_data_labels)

    print('Mean Accuracy: %.3f' % results.best_score_)
    print('Best Learning Rate: %s' % results.best_params_)

    means = results.cv_results_['mean_test_score']
    params = list(map(lambda dic : dic['eta0'], results.cv_results_['params']))

    plt.plot(params, means)
    plt.suptitle('Learning Rate On Score')
    plt.ylabel("Mean Test Score")
    plt.xlabel("Learning Rate")
    plt.show()
  
  def iteration_test(self, grid=None):

    perceptron_model = Perceptron(eta0=0.01)

    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

    if not grid:
      grid = dict()
      grid['max_iter'] = [1, 10, 100, 1000, 10000]

    search = GridSearchCV(perceptron_model, grid, scoring='accuracy', cv=cv, n_jobs=-1)

    results = search.fit(train_data_features, train_data_labels)

    print('Mean Accuracy: %.3f' % results.best_score_)
    print('Best IterationCount: %s' % results.best_params_)

    means = results.cv_results_['mean_test_score']
    params = list(map(lambda dic : dic['max_iter'], results.cv_results_['params']))

    plt.plot(params, means)
    plt.suptitle('Iteration Rate On Score')
    plt.ylabel("Mean Test Score")
    plt.xlabel("Iteration")
    plt.show()

perceptron_test = PerceptronTest(train_data_features, train_data_labels)

perceptron_test.simple_test(test_data_features, test_data_labels)

perceptron_test.learning_rate_test()

"""The hyperparameters for the Perceptron algorithm must be configured for our dataset.

Perhaps the most important hyperparameter is the learning rate.

A large learning rate can cause the model to learn fast, but perhaps at the cost of lower skill. A smaller learning rate can result in a better-performing model but may take a long time to train the model.
"""

perceptron_test.iteration_test()

"""Another important hyperparameter is how many epochs are used to train the model.

This may depend on the training dataset and could vary greatly. Again, we will explore configuration values on a log scale between 1 and 1e+4.
"""

perceptron_test.test_best(0.1, 100, test_data_features, test_data_labels)

"""Final result for Perceptron with the best hyperparams

# SVM Test

class for testing different kinds of svm in sklearn
"""

class SVMTest:

  def __init__(self, svm_model, train_data_features, train_data_labels):
    self.svm_model = svm_model
    self.train_data_features = train_data_features
    self.train_data_labels = train_data_labels

  def test_svm(self, test_data_features, test_data_labels):
    start = time.time()
    # fit model
    self.svm_model.fit(self.train_data_features, self.train_data_labels)

    # Predict traint model
    train_predict = self.svm_model.predict(self.train_data_features)
    
    train_purity = accuracy_score(train_predict, self.train_data_labels)

    train_rand_index = adjusted_rand_score(train_predict, self.train_data_labels)

    # Predict Test
    test_predict = self.svm_model.predict(test_data_features)

    test_purity = accuracy_score(test_predict, test_data_labels)

    test_rand_index = adjusted_rand_score(test_predict, test_data_labels)

    print(f'Train Data Accuracy : {round(train_purity * 100, 2)}%')
    print(f'Train Data Rand-Index : {round(train_rand_index * 100, 2)}%')
    print(f'Test Data Accuracy : {round(test_purity * 100, 2)}%')
    print(f'Test Data Rand-Index : {round(test_rand_index * 100, 2)}%')
    print('Time Taken(s): ', time.time() - start)

    return train_purity, train_rand_index, test_purity, test_rand_index

"""# SVC vs LinearSVC
SVC has multiple kernels which allows us to classify non-linear data. <br>
the simplest kernel is the linear kernel. according to the documentation for the `SVC` on `sklearn` website, 
> *The fit time scales at least quadratically with the number of samples and may be impractical beyond tens of thousands of samples. For large datasets consider using LinearSVC* <br>

And since we are working on a dataset with 100 thousand data with 22 dimension, it would take a lot of time for fitting our dataset with the normal `SVC` class. <br>
But we have to consider that the `LinearSVC` class is just for the linear kernel, so for other kernels we have to use the `SVC` class.
"""

linear_kerner_svm = SVMTest(SVC(kernel='linear'), train_data_features, train_data_labels)
linear_kerner_svm.test_svm(test_data_features, test_data_labels)

linear_kerner_svm = SVMTest(LinearSVC(), train_data_features, train_data_labels)
linear_kerner_svm.test_svm(test_data_features, test_data_labels)

"""In the above code I used both the `SVC` class with linear kernel and the `LinearSVC`. <br>
The execution time for the normal `SVC` is over 5 hour, but the `LinearSVC` has only taken 5 minutes with 10,000 iteration to execute. <br>
Although the accuracy of the `SVC` class is better, but it is not as much different as the execution time, and we are sure that if we improve the accuracy of `LinearSVC`, by using the same parameters we will also improve the `SVC` model too. <br>
Therefore I chose the `LinearSVC` model for the following codes. But as soon as we are done the linear kernel, we will switch back to the `SVC` class. <br>
And also the `LinearSVC` uses the one-vs-all technique and unfortunately it can not be changed, so I used the normal `SVC` for testing this parameter.

# LinearSVC
"""

duals = [True, False]

for dual in duals:
    linear_kerner_svm = SVMTest(LinearSVC(dual=dual), train_data_features, train_data_labels)
    print('dual: ', dual)
    linear_kerner_svm.test_svm(test_data_features, test_data_labels)
    print(' ')

"""The first parameter that I test is the `dual` parameter. <br>
The duality optimization is a theory which divide the solution of a problem into a *'dual*' solution and a *'primal'* solution. <br>
The primal solution is the main and more complete solution which will also take longer time, but the dual solution is a solution which is one step before the main solution. <br> 
The properties of dual solution is that its answer is too much close to the main solution, even sometimes better, and it will also produce the answer in a much slower time. <br>
According to the documentation of the `sklearn`:
> Select the algorithm to either solve the dual or primal optimization problem. Prefer `dual=False` when `n_samples > n_features`.

`n_samples` means the number of samples of our train dataset, which is 100 thousand and the `n_features` means the dimension of our data, which is 22. <br>
So `sklearn` suggests us to set `dual=False` for our problem, and also according to the code above, the `dual=False` has a slightly more accurate answer.
"""

coeffs = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1, 1.5, 2]
train_purities = []
train_rand_indexes = []
test_purities = []
test_rand_indexes = []

for c in coeffs :
    linear_kerner_svm = SVMTest(LinearSVC(dual=False, C=c), train_data_features, train_data_labels)

    train_purity, train_rand_index, test_purity, test_rand_index = linear_kerner_svm.test_svm(test_data_features, test_data_labels)
    train_purities.append(train_purity)
    train_rand_indexes.append(train_rand_index)
    test_purities.append(test_purity)
    test_rand_indexes.append(test_rand_index)
    print()

plt.plot(coeffs, train_rand_indexes)
plt.suptitle('C on Train Rand-Index')
plt.show()
plt.plot(coeffs, train_purities, 'tab:orange')
plt.suptitle('C on Train Purity')
plt.show()
plt.plot(coeffs, test_rand_indexes, 'tab:green')
plt.suptitle('C on Test Rand-Index')
plt.show()
plt.plot(coeffs, test_purities, 'tab:red')
plt.suptitle('C on Test Purity')
plt.show()

"""The above code shows the effect of `C` parameter on accuracy. <br>
Parameter `C` can be viewed as a way to control overfitting. It trades off the relative importance of maximizing the margin and fitting the training data. <br>
It is for controlling the effect of slack variables. slack variables are defined for measuring the misclassification of difficult or noisy examples. <br>
As you can see in the plots, by increasing this value, we will have a better accuracy for our training data, but less accuracy for test data. <br>
This behaviour was expected, because we are allowing more slack variables and noises to our fitting process, which will lead to less accuracy for external data. ( data which are not in the train dataset) <br>
According to the plots the best value for this parameter is 1.5. <br>

# SVC
Up to now all our codes have used the `LinearSVC` class and we have figured out that the best parameters are `C=0.4` and `dual=False`. <br>
These values will also have the best effect on the normal `SVC`, because they are the same but with different implementation. <br>
As I said, and just like the documentations said, the execution time of `SVC` for our full dataset will be too much, therefore I chose 10% of data randomly and work with them, we can use the result for the whole dataset and get better accuracy. <br>
"""

seq = np.random.randint(0,103594,10000)
X_train_sample = train_data_features[seq]
y_train_sample = train_data_labels[seq]

print(X_train_sample.shape)
print(y_train_sample.shape)

seq = np.random.randint(0,25893,5000)
X_test_sample = test_data_features[seq]
y_test_sample = test_data_labels[seq]

print(X_test_sample.shape)
print(y_test_sample.shape)

fig, ax = plt.subplots(1,2, figsize=(10,4))
ax[0].hist(y_train_sample)
ax[0].title.set_text('sample')
ax[1].hist(train_data_labels)
ax[1].title.set_text('original')
fig.show
plt.show()

"""First thing to check is the belivabilty of our sample dataset. <br>
In the code above, I chose 10% of datset randomly and shows the histogram of original and random dataset. we can see that they are somehow the same and it means that we can count on the result and improvments that we apply to the random dataset.
"""

kernels = ['poly', 'rbf', 'sigmoid', 'linear']
train_purities = []
train_rand_indexes = []
test_purities = []
test_rand_indexes = []

def show_bar_chart(x, y, title, color):
    fig = plt.figure()
    ax = fig.add_axes([0,0,1,1])
    ax.title.set_text(title)
    ax.bar(x, y, color=color, width=0.3)
    plt.show()

for kernel in kernels:
    svm = SVC(kernel=kernel)

    svm_test = SVMTest(svm, X_train_sample, y_train_sample)
    train_purity, train_rand_index, test_purity, test_rand_index = svm_test.test_svm(X_test_sample, y_test_sample)

    train_purities.append(train_purity)
    train_rand_indexes.append(train_rand_index)
    test_purities.append(test_purity)
    test_rand_indexes.append(test_rand_index)
    print()


show_bar_chart(kernels, train_purities,'Train purities', 'tab:blue')
show_bar_chart(kernels, train_rand_indexes, 'Train rand indexes', 'tab:olive')
show_bar_chart(kernels, test_purities, 'Test purities', 'tab:pink')
show_bar_chart(kernels, test_rand_indexes, 'Test rand indexes', 'tab:purple')

"""In this section I used the different kernels of `sklearn` and show their effect on accuracy. <br>
As it is shown in the bar plots, for the train dataset the result of linear, polynomial and gaussian(rbf) is the same but the sigmoid kernel has lower accuracy. <br>
But on the test data the difference of gaussian(rbf) kernel is observable and it has the best accuracy over other kernels. <br>
As the rbf kernel was the best kernel, for following codes I used this kernel. 
> Important Note: the default parameter for `p` in polynomial kernel is 3 and default `gamma` for the rbf is `1 / (n_features * X.var())`
"""

decision_function_shape = ['ovo', 'ovr']

for dfs in decision_function_shape:
    svm = SVC(kernel='rbf', decision_function_shape=dfs)
    
    print(dfs)
    svm_test = SVMTest(svm, X_train_sample, y_train_sample)
    train_purity, train_rand_index, test_purity, test_rand_index = svm_test.test_svm(test_data_features, test_data_labels)

    print()

"""The default technique for multiclass classification used by `SVC` is one-vs-all or one-vs-rest. <br>
In this code block I used both techniques and show the results of them. <br>
As you can see they are the exact same and we can not say which one is better. So we keep using the default value which is one-vs-rest.  
"""

gammas = ['scale', 'auto']

for gamma in gammas :
    svm = SVC(kernel='rbf', gamma=gamma)
    
    print(gamma)
    svm_test = SVMTest(svm, X_train_sample, y_train_sample)
    train_purity, train_rand_index, test_purity, test_rand_index = svm_test.test_svm(test_data_features, test_data_labels)
    print()

"""The formual for rbf or gaussian kernel is below: <br>
![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABJCAYAAADL0IO8AAAFEElEQVR4nO2cP4zbVBzHvw8xcGNpc5WqOEyFSBUM2AJOV0TFkDgVElJVloRmuJPullwyZrS7JWOvt9xQBuixIiY4BgRiKEjJhCJCVTEkp0j0dL0xLPAYEjvx2Ukc2+/5OX0fyUri+N3PL1//3u/3/h2hlFJIhOGVuG9A4kQKIhhSEMGQggiGFEQwpCCMaDZ3UVAICCEgREPZNH2Vk4IwYvBjG7hjgFKKotrGV49+w26zubDcqxzu7aXkwXHLfn99U4ey/xzD4XBhOekhPLiUQsrnpbEKQghxvCaZuXU5PwXJ3cSXPuKI9BAOPHsKqB9f83WtjCGM2ctn0E1to1Wv+7peeghD9vIZfItP0XpswDTLyOdrC8tID2FESSP4ug0AByDkAIACvbq1sByJc/idEAJKqf2aZKKqi2yyBEMKIhhSEMEQSpCSZg3GjQbkbt/O2J8z+b3E2loGobKsoxYF7OwEuPLeFqr/PsH5xoavXq6otpaCxohlfvo2Go0dqqdBAdB0Ok2VXIWZ/ShtedUlCEI1WQBQrx/ig49UAMDJyVXc2rjsu6yzGfI4tFJktlghnCAA8NrbKvQ0ALTx05Mzz2vMWgEZojomfo5aFJTOOVpHgWw5bWZQqDFs0kL5V0gww83Nkko1TaMAKKDSe4bB7B6isjWrLssinIeYtQIedd/C3bvv2k+u12zbqHnyPzUaxhYwNSWrFHzN/AUmlJwhwYWnqqhi/KQqNJdTx+/HR1qnO42GXdao6lRRi4FtL2PLwiypMwP/xboERbi0193Ke3PW7WA9u83FlsXT7nPc2GQb+IVrsvzQbO7i2R9A6jK/AUnTLKPbTiF1ia2dRAoyHPTQwQ1krq3xM3p+ilM1y7zTmEhBzrod9Pvfo/37P1xtrmffZG5HqBjil4fHPTzkaM9uIrPsm8hEeggvTLMM7Q0dg8Ff6Kx/iO8esB/jkoLMYW1tDan/jrG/f45bn1znYlNO4UZEVHWJVRCJm8iarGVHWiXexOIhq7B0NAq8fvpY0l7ZSs5GZlmCIQXxIOjupyiQgngQdPdTFCRy6IQ1QXc/RYH0kEUssfspCqQgi1hi91MUJF4QZwAmIBGvCllm91MkhJoAjplGY4fqikL1qjH5nB7Nk1vnwlDJKVQtmaH/zjIk2kOGgx5wc8seFq/XD/H+HR0K+uh056+xWkSQ3U+RwFV+DhjGPaoCnk+2UdWpMr26xHFMvGqyIsX9HWtWT5CqThXFvYynklMcP2yjsUPzGbaL8IKwcoIUVffTbJZUz1WJlZxCEWJtFwsSHUMucv9zDX9mtx1Trc3mLn79uQ0lN2Obwd8vuPXC/bAygljLQluPDcf54aCHzgmwnrriKvPirA9cfR2HPveQ88C3IKZZhibopJNZK+CLb4DCZ+9MzhULc/sjZq2AX9oK9E0+c+W+WdSmTW9qsTKX6XMsN9T4YWbmNF6fa9/r1HpdqwzvPoYf5goyqaw7IFqpYZyCjIK1dxo7fV9WKjz5XrzsymKmIJNKuLMWL6+RRMNMQeZ5wCinh3Ap4yrgGdRHAQ8AVMe+O2sg7+CHPqAWPbeISULipZI9dDD2gOkmiucwwsuISxDnjz85ZKzgg2sKdzgc4vRk9F7JVdA75rnOXBKop27Fkjj/BcXK4uU208PPF7MsmWGxxVOQ+fMGMp6wxEfHcLa3SKJHbkcQjJUZfl8VpCCCIQURDCmIYEhBBEMKIhj/A8O+eVeiLk1lAAAAAElFTkSuQmCC) <br>
It has a paramter called gamma. `sklearn` library gives us two choice for setting this parameter. <br>
`scale` value, which is the default value, means `1 / (n_features * X.var())` and the `auto` value is `1 / n_features`. <br>
As the result shows, `scale` value has better accuracy, which was expected because the `scale` method consider the value of each data, rather than just the number of features of dataser.
"""

Cs = [1, 10, 100, 1000, 10000]
train_purities = []
train_rand_indexes = []
test_purities = []
test_rand_indexes = []

for C in Cs :
    svm = SVC(kernel='rbf', C=C)
    
    print(C)
    svm_test = SVMTest(svm, X_train_sample, y_train_sample)
    train_purity, train_rand_index, test_purity, test_rand_index = svm_test.test_svm(test_data_features, test_data_labels)
    print()

    train_purities.append(train_purity)
    train_rand_indexes.append(train_rand_index)
    test_purities.append(test_purity)
    test_rand_indexes.append(test_rand_index)


plt.plot(Cs, train_rand_indexes)
plt.suptitle('C on Train Rand-Index')
plt.show()
plt.plot(Cs, train_purities, 'tab:orange')
plt.suptitle('C on Train Purity')
plt.show()
plt.plot(Cs, test_rand_indexes, 'tab:green')
plt.suptitle('C on Test Rand-Index')
plt.show()
plt.plot(Cs, test_purities, 'tab:red')
plt.suptitle('C on Test Purity')
plt.show()

"""For the linear kernel, best value for parameter C was 10000. <br>
But this value will be different for the rbf kernel since. <br>
As you can see in the plots the best value for value when using rbf kernel, is 5 and it is the value we will use in the following codes.

# Conclusion
After all the tests on different kernels and paramater changing, we saw that on 10% of the whole dataset which is selected randomly, the rbf or gaussian kernel has the best performance. <br>
And for the rbf kernel, `c=10000.0` and `gamma='scale'`, which is the default value, has better accuracy. <br>
Also we saw that the scores for both one-vs-one and one-vs-all techniques was the same. But we choose one-vs-all because it was the default value and prefered by `sklearn` community. <br>
So now we expect that if we set these parameters to the whole dataset, we would have a high accuracy. <br>
"""

linear_kerner_svm = SVMTest(SVC(kernel='rbf', C=10000), train_data_features, train_data_labels)
linear_kerner_svm.test_svm(test_data_features, test_data_labels)