import numpy as np import matplotlib.pyplot as plt import time def generate_data(N): y = np.random.choice([1, -1], N) x = np.empty((N, 3)) for index, i in enumerate(y): if i == 1: mean = [3, 2] covariance = [[0.4, 0], [0, 0.4]] x1, x2 = np.random.multivariate_normal(mean, covariance) x[index] = np.array([1, x1, x2]) else: mean = [5, 0] covariance = [[0.6, 0], [0, 0.6]] x1, x2 = np.random.multivariate_normal(mean, covariance) x[index] = np.array([1, x1, x2]) return x, y def average_square_error(y, y_hat): error = (y==y_hat) return error.sum()/error.shape[0] if __name__ == '__main__': errors = [] for times in range(128): np.random.seed(times) train_x, train_y = generate_data(256) # (256, 3), (256, ) test_x, test_y = generate_data(4096) pseudo_inverse_x = np.linalg.pinv(train_x) # (3, 256) w = pseudo_inverse_x @ train_y # (3) predict_y = train_x @ w predict_y = np.sign(predict_y) error = average_square_error(predict_y, train_y) errors.append(error) print(times, error) errors = sorted(errors) median = ( errors[63] + errors[64] ) / 2 plt.hist(errors, bins=10) plt.xlabel("Ein") plt.title("median: {}".format(median)) plt.savefig("10.png")