NTU_HTML/hw3/hw3_10.py

import numpy as np
import matplotlib.pyplot as plt
import time


def generate_data(N):
    y = np.random.choice([1, -1], N)


    x = np.empty((N, 3))
    for index, i in enumerate(y):
        if i == 1:
            mean = [3, 2]
            covariance = [[0.4, 0], [0, 0.4]]
            x1, x2 = np.random.multivariate_normal(mean, covariance)
            x[index] = np.array([1, x1, x2])
        else:
            mean = [5, 0]
            covariance = [[0.6, 0], [0, 0.6]]
            x1, x2 = np.random.multivariate_normal(mean, covariance)
            x[index] = np.array([1, x1, x2])
    return x, y

def average_square_error(y, y_hat):
    error = (y==y_hat)
    return error.sum()/error.shape[0]

if __name__ == '__main__':
    errors = []
    for times in range(128):
        np.random.seed(times)
        train_x, train_y = generate_data(256)               # (256, 3), (256, )
        test_x, test_y = generate_data(4096)

        pseudo_inverse_x = np.linalg.pinv(train_x)          # (3, 256)
        w = pseudo_inverse_x @ train_y                      # (3)

        predict_y = train_x @ w
        predict_y = np.sign(predict_y)

        error = average_square_error(predict_y, train_y)
        errors.append(error)
        print(times, error)

    errors = sorted(errors)
    median = ( errors[63] + errors[64] ) / 2

    plt.hist(errors, bins=10)
    plt.xlabel("Ein")
    plt.title("median: {}".format(median))
    plt.savefig("10.png")