feat: add hw3
This commit is contained in:
parent
1c6f295d85
commit
b3ddd2d11e
51
hw3/hw3_10.py
Normal file
51
hw3/hw3_10.py
Normal file
@ -0,0 +1,51 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import time
|
||||
|
||||
|
||||
def generate_data(N):
|
||||
y = np.random.choice([1, -1], N)
|
||||
|
||||
|
||||
x = np.empty((N, 3))
|
||||
for index, i in enumerate(y):
|
||||
if i == 1:
|
||||
mean = [3, 2]
|
||||
covariance = [[0.4, 0], [0, 0.4]]
|
||||
x1, x2 = np.random.multivariate_normal(mean, covariance)
|
||||
x[index] = np.array([1, x1, x2])
|
||||
else:
|
||||
mean = [5, 0]
|
||||
covariance = [[0.6, 0], [0, 0.6]]
|
||||
x1, x2 = np.random.multivariate_normal(mean, covariance)
|
||||
x[index] = np.array([1, x1, x2])
|
||||
return x, y
|
||||
|
||||
def average_square_error(y, y_hat):
|
||||
error = (y==y_hat)
|
||||
return error.sum()/error.shape[0]
|
||||
|
||||
if __name__ == '__main__':
|
||||
errors = []
|
||||
for times in range(128):
|
||||
np.random.seed(times)
|
||||
train_x, train_y = generate_data(256) # (256, 3), (256, )
|
||||
test_x, test_y = generate_data(4096)
|
||||
|
||||
pseudo_inverse_x = np.linalg.pinv(train_x) # (3, 256)
|
||||
w = pseudo_inverse_x @ train_y # (3)
|
||||
|
||||
predict_y = train_x @ w
|
||||
predict_y = np.sign(predict_y)
|
||||
|
||||
error = average_square_error(predict_y, train_y)
|
||||
errors.append(error)
|
||||
print(times, error)
|
||||
|
||||
errors = sorted(errors)
|
||||
median = ( errors[63] + errors[64] ) / 2
|
||||
|
||||
plt.hist(errors, bins=10)
|
||||
plt.xlabel("Ein")
|
||||
plt.title("median: {}".format(median))
|
||||
plt.savefig("10.png")
|
||||
89
hw3/hw3_11.py
Normal file
89
hw3/hw3_11.py
Normal file
@ -0,0 +1,89 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import time
|
||||
|
||||
|
||||
def generate_data(N):
|
||||
y = np.random.choice([1, -1], N)
|
||||
|
||||
|
||||
x = np.empty((N, 3))
|
||||
for index, i in enumerate(y):
|
||||
if i == 1:
|
||||
mean = [3, 2]
|
||||
covariance = [[0.4, 0], [0, 0.4]]
|
||||
x1, x2 = np.random.multivariate_normal(mean, covariance)
|
||||
x[index] = np.array([1, x1, x2])
|
||||
else:
|
||||
mean = [5, 0]
|
||||
covariance = [[0.6, 0], [0, 0.6]]
|
||||
x1, x2 = np.random.multivariate_normal(mean, covariance)
|
||||
x[index] = np.array([1, x1, x2])
|
||||
return x, y
|
||||
|
||||
def average_square_error(y, y_hat):
|
||||
error = (y!=y_hat)
|
||||
return error.sum()/error.shape[0]
|
||||
|
||||
def sigmoid(s):
|
||||
return 1/(1+np.exp(-s))
|
||||
|
||||
def gradient(w, x, y):
|
||||
y = y.reshape((-1, 1))
|
||||
theta = sigmoid( - x@w * y )
|
||||
yx = -y * x
|
||||
result = theta * yx
|
||||
|
||||
grad = np.mean(result, axis=0)
|
||||
return grad.reshape((3, 1))
|
||||
|
||||
if __name__ == '__main__':
|
||||
linear_regression_errors = []
|
||||
logistic_regression_errors = []
|
||||
|
||||
for times in range(128):
|
||||
|
||||
# generate data
|
||||
np.random.seed(times)
|
||||
train_x, train_y = generate_data(256) # (256, 3), (256, )
|
||||
test_x, test_y = generate_data(4096)
|
||||
|
||||
# linear regression
|
||||
pseudo_inverse_x = np.linalg.pinv(train_x) # (3, 256)
|
||||
w = pseudo_inverse_x @ train_y # (3)
|
||||
|
||||
predict_y = test_x @ w
|
||||
predict_y = np.sign(predict_y)
|
||||
|
||||
error = average_square_error(predict_y, test_y)
|
||||
linear_regression_errors.append(error)
|
||||
print(times, error)
|
||||
|
||||
# logistic regression
|
||||
lr = 0.1
|
||||
T = 500
|
||||
w0 = np.zeros((3, 1))
|
||||
for iter in range(T):
|
||||
grad = gradient(w0, train_x, train_y)
|
||||
w0 -= lr * grad
|
||||
w0 = w0.reshape((3))
|
||||
|
||||
predict_y = test_x @ w0
|
||||
predict_y = np.sign(predict_y)
|
||||
|
||||
error = average_square_error(predict_y, test_y)
|
||||
logistic_regression_errors.append(error)
|
||||
print(times, error)
|
||||
print()
|
||||
|
||||
|
||||
linear_regression_errors = sorted(linear_regression_errors)
|
||||
logistic_regression_errors = sorted(logistic_regression_errors)
|
||||
linear_regression_median = linear_regression_errors[63] + linear_regression_errors[64]
|
||||
logistic_regression_median = logistic_regression_errors[63] + logistic_regression_errors[64]
|
||||
|
||||
plt.scatter(linear_regression_errors, logistic_regression_errors)
|
||||
plt.xlabel("linear regression error")
|
||||
plt.xlabel("logistic regression error")
|
||||
plt.title("linear regression: {}\nlogistic regression: {}".format(linear_regression_median, logistic_regression_median))
|
||||
plt.savefig("11.png")
|
||||
103
hw3/hw3_12.py
Normal file
103
hw3/hw3_12.py
Normal file
@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import time
|
||||
|
||||
|
||||
def generate_data(N):
|
||||
y = np.random.choice([1, -1], N)
|
||||
|
||||
x = np.empty((N, 3))
|
||||
for index, i in enumerate(y):
|
||||
if i == 1:
|
||||
mean = [3, 2]
|
||||
covariance = [[0.4, 0], [0, 0.4]]
|
||||
x1, x2 = np.random.multivariate_normal(mean, covariance)
|
||||
x[index] = np.array([1, x1, x2])
|
||||
else:
|
||||
mean = [5, 0]
|
||||
covariance = [[0.6, 0], [0, 0.6]]
|
||||
x1, x2 = np.random.multivariate_normal(mean, covariance)
|
||||
x[index] = np.array([1, x1, x2])
|
||||
return x, y
|
||||
|
||||
def generate_outliers(N):
|
||||
y = np.ones((N))
|
||||
|
||||
x = np.empty((N, 3))
|
||||
for index, i in enumerate(y):
|
||||
mean = [0, 6]
|
||||
covariance = [[0.1, 0], [0, 0.3]]
|
||||
x1, x2 = np.random.multivariate_normal(mean, covariance)
|
||||
x[index] = np.array([1, x1, x2])
|
||||
return x, y
|
||||
|
||||
def average_square_error(y, y_hat):
|
||||
error = (y!=y_hat)
|
||||
return error.sum()/error.shape[0]
|
||||
|
||||
def sigmoid(s):
|
||||
return 1/(1+np.exp(-s))
|
||||
|
||||
def gradient(w, x, y):
|
||||
y = y.reshape((-1, 1))
|
||||
theta = sigmoid( - x@w * y )
|
||||
yx = -y * x
|
||||
result = theta * yx
|
||||
|
||||
grad = np.mean(result, axis=0)
|
||||
return grad.reshape((3, 1))
|
||||
|
||||
if __name__ == '__main__':
|
||||
linear_regression_errors = []
|
||||
logistic_regression_errors = []
|
||||
|
||||
for times in range(128):
|
||||
|
||||
# generate data
|
||||
np.random.seed(times)
|
||||
train_x, train_y = generate_data(256) # (256, 3), (256, )
|
||||
test_x, test_y = generate_data(4096)
|
||||
outlier_x, outlier_y = generate_outliers(16)
|
||||
|
||||
train_x = np.concatenate((train_x, outlier_x), axis=0)
|
||||
train_y = np.concatenate((train_y, outlier_y), axis=0)
|
||||
|
||||
# linear regression
|
||||
pseudo_inverse_x = np.linalg.pinv(train_x) # (3, 256)
|
||||
w = pseudo_inverse_x @ train_y # (3)
|
||||
|
||||
predict_y = test_x @ w
|
||||
predict_y = np.sign(predict_y)
|
||||
|
||||
error = average_square_error(predict_y, test_y)
|
||||
linear_regression_errors.append(error)
|
||||
print(times, error)
|
||||
|
||||
# logistic regression
|
||||
lr = 0.1
|
||||
T = 500
|
||||
w0 = np.zeros((3, 1))
|
||||
for iter in range(T):
|
||||
grad = gradient(w0, train_x, train_y)
|
||||
w0 -= lr * grad
|
||||
w0 = w0.reshape((3))
|
||||
|
||||
predict_y = test_x @ w0
|
||||
predict_y = np.sign(predict_y)
|
||||
|
||||
error = average_square_error(predict_y, test_y)
|
||||
logistic_regression_errors.append(error)
|
||||
print(times, error)
|
||||
print()
|
||||
|
||||
|
||||
linear_regression_errors = sorted(linear_regression_errors)
|
||||
logistic_regression_errors = sorted(logistic_regression_errors)
|
||||
linear_regression_median = linear_regression_errors[63] + linear_regression_errors[64]
|
||||
logistic_regression_median = logistic_regression_errors[63] + logistic_regression_errors[64]
|
||||
|
||||
plt.scatter(linear_regression_errors, logistic_regression_errors)
|
||||
plt.xlabel("linear regression error")
|
||||
plt.xlabel("logistic regression error")
|
||||
plt.title("linear regression: {}\nlogistic regression: {}".format(linear_regression_median, logistic_regression_median))
|
||||
plt.savefig("12.png")
|
||||
51
hw3/hw3_9.py
Normal file
51
hw3/hw3_9.py
Normal file
@ -0,0 +1,51 @@
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import time
|
||||
|
||||
|
||||
def generate_data(N):
|
||||
y = np.random.choice([1, -1], N)
|
||||
|
||||
|
||||
x = np.empty((N, 3))
|
||||
for index, i in enumerate(y):
|
||||
if i == 1:
|
||||
mean = [3, 2]
|
||||
covariance = [[0.4, 0], [0, 0.4]]
|
||||
x1, x2 = np.random.multivariate_normal(mean, covariance)
|
||||
x[index] = np.array([1, x1, x2])
|
||||
else:
|
||||
mean = [5, 0]
|
||||
covariance = [[0.6, 0], [0, 0.6]]
|
||||
x1, x2 = np.random.multivariate_normal(mean, covariance)
|
||||
x[index] = np.array([1, x1, x2])
|
||||
return x, y
|
||||
|
||||
def average_square_error(y, y_hat):
|
||||
error = y-y_hat
|
||||
error = error ** 2
|
||||
return error.sum()/error.shape[0]
|
||||
|
||||
if __name__ == '__main__':
|
||||
errors = []
|
||||
for times in range(128):
|
||||
np.random.seed(times)
|
||||
train_x, train_y = generate_data(256) # (256, 3), (256, )
|
||||
test_x, test_y = generate_data(4096)
|
||||
|
||||
pseudo_inverse_x = np.linalg.pinv(train_x) # (3, 256)
|
||||
w = pseudo_inverse_x @ train_y # (3)
|
||||
|
||||
predict_y = train_x @ w
|
||||
|
||||
error = average_square_error(predict_y, train_y)
|
||||
errors.append(error)
|
||||
print(times, error)
|
||||
|
||||
errors = sorted(errors)
|
||||
median = ( errors[63] + errors[64] ) / 2
|
||||
|
||||
plt.hist(errors, bins=10)
|
||||
plt.xlabel("Ein")
|
||||
plt.title("median: {}".format(median))
|
||||
plt.savefig("9.png")
|
||||
Loading…
Reference in New Issue
Block a user