diff --git a/hw3/hw3_10.py b/hw3/hw3_10.py new file mode 100644 index 0000000..73b4091 --- /dev/null +++ b/hw3/hw3_10.py @@ -0,0 +1,51 @@ +import numpy as np +import matplotlib.pyplot as plt +import time + + +def generate_data(N): + y = np.random.choice([1, -1], N) + + + x = np.empty((N, 3)) + for index, i in enumerate(y): + if i == 1: + mean = [3, 2] + covariance = [[0.4, 0], [0, 0.4]] + x1, x2 = np.random.multivariate_normal(mean, covariance) + x[index] = np.array([1, x1, x2]) + else: + mean = [5, 0] + covariance = [[0.6, 0], [0, 0.6]] + x1, x2 = np.random.multivariate_normal(mean, covariance) + x[index] = np.array([1, x1, x2]) + return x, y + +def average_square_error(y, y_hat): + error = (y==y_hat) + return error.sum()/error.shape[0] + +if __name__ == '__main__': + errors = [] + for times in range(128): + np.random.seed(times) + train_x, train_y = generate_data(256) # (256, 3), (256, ) + test_x, test_y = generate_data(4096) + + pseudo_inverse_x = np.linalg.pinv(train_x) # (3, 256) + w = pseudo_inverse_x @ train_y # (3) + + predict_y = train_x @ w + predict_y = np.sign(predict_y) + + error = average_square_error(predict_y, train_y) + errors.append(error) + print(times, error) + + errors = sorted(errors) + median = ( errors[63] + errors[64] ) / 2 + + plt.hist(errors, bins=10) + plt.xlabel("Ein") + plt.title("median: {}".format(median)) + plt.savefig("10.png") \ No newline at end of file diff --git a/hw3/hw3_11.py b/hw3/hw3_11.py new file mode 100644 index 0000000..ef13f00 --- /dev/null +++ b/hw3/hw3_11.py @@ -0,0 +1,89 @@ +import numpy as np +import matplotlib.pyplot as plt +import time + + +def generate_data(N): + y = np.random.choice([1, -1], N) + + + x = np.empty((N, 3)) + for index, i in enumerate(y): + if i == 1: + mean = [3, 2] + covariance = [[0.4, 0], [0, 0.4]] + x1, x2 = np.random.multivariate_normal(mean, covariance) + x[index] = np.array([1, x1, x2]) + else: + mean = [5, 0] + covariance = [[0.6, 0], [0, 0.6]] + x1, x2 = np.random.multivariate_normal(mean, covariance) + x[index] = np.array([1, x1, x2]) + return x, y + +def average_square_error(y, y_hat): + error = (y!=y_hat) + return error.sum()/error.shape[0] + +def sigmoid(s): + return 1/(1+np.exp(-s)) + +def gradient(w, x, y): + y = y.reshape((-1, 1)) + theta = sigmoid( - x@w * y ) + yx = -y * x + result = theta * yx + + grad = np.mean(result, axis=0) + return grad.reshape((3, 1)) + +if __name__ == '__main__': + linear_regression_errors = [] + logistic_regression_errors = [] + + for times in range(128): + + # generate data + np.random.seed(times) + train_x, train_y = generate_data(256) # (256, 3), (256, ) + test_x, test_y = generate_data(4096) + + # linear regression + pseudo_inverse_x = np.linalg.pinv(train_x) # (3, 256) + w = pseudo_inverse_x @ train_y # (3) + + predict_y = test_x @ w + predict_y = np.sign(predict_y) + + error = average_square_error(predict_y, test_y) + linear_regression_errors.append(error) + print(times, error) + + # logistic regression + lr = 0.1 + T = 500 + w0 = np.zeros((3, 1)) + for iter in range(T): + grad = gradient(w0, train_x, train_y) + w0 -= lr * grad + w0 = w0.reshape((3)) + + predict_y = test_x @ w0 + predict_y = np.sign(predict_y) + + error = average_square_error(predict_y, test_y) + logistic_regression_errors.append(error) + print(times, error) + print() + + + linear_regression_errors = sorted(linear_regression_errors) + logistic_regression_errors = sorted(logistic_regression_errors) + linear_regression_median = linear_regression_errors[63] + linear_regression_errors[64] + logistic_regression_median = logistic_regression_errors[63] + logistic_regression_errors[64] + + plt.scatter(linear_regression_errors, logistic_regression_errors) + plt.xlabel("linear regression error") + plt.xlabel("logistic regression error") + plt.title("linear regression: {}\nlogistic regression: {}".format(linear_regression_median, logistic_regression_median)) + plt.savefig("11.png") \ No newline at end of file diff --git a/hw3/hw3_12.py b/hw3/hw3_12.py new file mode 100644 index 0000000..65fd4f9 --- /dev/null +++ b/hw3/hw3_12.py @@ -0,0 +1,103 @@ +import numpy as np +import matplotlib.pyplot as plt +import time + + +def generate_data(N): + y = np.random.choice([1, -1], N) + + x = np.empty((N, 3)) + for index, i in enumerate(y): + if i == 1: + mean = [3, 2] + covariance = [[0.4, 0], [0, 0.4]] + x1, x2 = np.random.multivariate_normal(mean, covariance) + x[index] = np.array([1, x1, x2]) + else: + mean = [5, 0] + covariance = [[0.6, 0], [0, 0.6]] + x1, x2 = np.random.multivariate_normal(mean, covariance) + x[index] = np.array([1, x1, x2]) + return x, y + +def generate_outliers(N): + y = np.ones((N)) + + x = np.empty((N, 3)) + for index, i in enumerate(y): + mean = [0, 6] + covariance = [[0.1, 0], [0, 0.3]] + x1, x2 = np.random.multivariate_normal(mean, covariance) + x[index] = np.array([1, x1, x2]) + return x, y + +def average_square_error(y, y_hat): + error = (y!=y_hat) + return error.sum()/error.shape[0] + +def sigmoid(s): + return 1/(1+np.exp(-s)) + +def gradient(w, x, y): + y = y.reshape((-1, 1)) + theta = sigmoid( - x@w * y ) + yx = -y * x + result = theta * yx + + grad = np.mean(result, axis=0) + return grad.reshape((3, 1)) + +if __name__ == '__main__': + linear_regression_errors = [] + logistic_regression_errors = [] + + for times in range(128): + + # generate data + np.random.seed(times) + train_x, train_y = generate_data(256) # (256, 3), (256, ) + test_x, test_y = generate_data(4096) + outlier_x, outlier_y = generate_outliers(16) + + train_x = np.concatenate((train_x, outlier_x), axis=0) + train_y = np.concatenate((train_y, outlier_y), axis=0) + + # linear regression + pseudo_inverse_x = np.linalg.pinv(train_x) # (3, 256) + w = pseudo_inverse_x @ train_y # (3) + + predict_y = test_x @ w + predict_y = np.sign(predict_y) + + error = average_square_error(predict_y, test_y) + linear_regression_errors.append(error) + print(times, error) + + # logistic regression + lr = 0.1 + T = 500 + w0 = np.zeros((3, 1)) + for iter in range(T): + grad = gradient(w0, train_x, train_y) + w0 -= lr * grad + w0 = w0.reshape((3)) + + predict_y = test_x @ w0 + predict_y = np.sign(predict_y) + + error = average_square_error(predict_y, test_y) + logistic_regression_errors.append(error) + print(times, error) + print() + + + linear_regression_errors = sorted(linear_regression_errors) + logistic_regression_errors = sorted(logistic_regression_errors) + linear_regression_median = linear_regression_errors[63] + linear_regression_errors[64] + logistic_regression_median = logistic_regression_errors[63] + logistic_regression_errors[64] + + plt.scatter(linear_regression_errors, logistic_regression_errors) + plt.xlabel("linear regression error") + plt.xlabel("logistic regression error") + plt.title("linear regression: {}\nlogistic regression: {}".format(linear_regression_median, logistic_regression_median)) + plt.savefig("12.png") \ No newline at end of file diff --git a/hw3/hw3_9.py b/hw3/hw3_9.py new file mode 100644 index 0000000..3d0466b --- /dev/null +++ b/hw3/hw3_9.py @@ -0,0 +1,51 @@ +import numpy as np +import matplotlib.pyplot as plt +import time + + +def generate_data(N): + y = np.random.choice([1, -1], N) + + + x = np.empty((N, 3)) + for index, i in enumerate(y): + if i == 1: + mean = [3, 2] + covariance = [[0.4, 0], [0, 0.4]] + x1, x2 = np.random.multivariate_normal(mean, covariance) + x[index] = np.array([1, x1, x2]) + else: + mean = [5, 0] + covariance = [[0.6, 0], [0, 0.6]] + x1, x2 = np.random.multivariate_normal(mean, covariance) + x[index] = np.array([1, x1, x2]) + return x, y + +def average_square_error(y, y_hat): + error = y-y_hat + error = error ** 2 + return error.sum()/error.shape[0] + +if __name__ == '__main__': + errors = [] + for times in range(128): + np.random.seed(times) + train_x, train_y = generate_data(256) # (256, 3), (256, ) + test_x, test_y = generate_data(4096) + + pseudo_inverse_x = np.linalg.pinv(train_x) # (3, 256) + w = pseudo_inverse_x @ train_y # (3) + + predict_y = train_x @ w + + error = average_square_error(predict_y, train_y) + errors.append(error) + print(times, error) + + errors = sorted(errors) + median = ( errors[63] + errors[64] ) / 2 + + plt.hist(errors, bins=10) + plt.xlabel("Ein") + plt.title("median: {}".format(median)) + plt.savefig("9.png") \ No newline at end of file