diff --git a/hw4/hw4_10.py b/hw4/hw4_10.py index 9071a7d..9aa7bd8 100644 --- a/hw4/hw4_10.py +++ b/hw4/hw4_10.py @@ -33,32 +33,57 @@ def error(gt, pred): err = (err+1) if gt[index]!=pred[index] else err return err/len(gt) -if __name__ == '__main__': - x, y = read_data(FILENAME) - x = form(x) - prob = problem(y, x) - lambda_powers = [-6, -4, -2, 0, 2] +def transform(features): + output_features = [] + for index, feature in enumerate(features): + output_features.append([ 0 for _ in range(84) ]) + output_features[index][0] = 1 + + d_index = 1 + # 1-order + for i in feature: + output_features[index][d_index] = i + d_index += 1 + + # 2-orde + for i in range(len(feature)): + for j in range(i, len(feature)): + output_features[index][d_index] = feature[i]*feature[j] + d_index += 1 + # 3-order + for i in range(len(feature)): + for j in range(i, len(feature)): + for k in range(j, len(feature)): + output_features[index][d_index] = i*j*k + d_index += 1 + return output_features - results = [] - for lambda_power in lambda_powers: - lambda_value = 10 ** lambda_power - param_C = 1/(2*lambda_value) - param = parameter('-s 0 -c {} -e 0.000001 -q'.format(param_C)) - model = train(prob, param) - p_label, p_acc, p_val = predict(y, x, model) - err = error(y, p_label) - print("0/1 error: ", err) - print() - results.append({'lambda': lambda_power, 'error': err}) +x, y = read_data(FILENAME) +x = transform(x) +x = form(x) +prob = problem(y, x) +lambda_powers = [-6, -4, -2, 0, 2] - ans, min_err = None, 1 - for i in results: - print(i) - if i['error'] <= min_err: - min_err = i['error'] - ans = i +results = [] +for lambda_power in lambda_powers: + lambda_value = 10 ** lambda_power + param_C = 1/(2*lambda_value) + param = parameter('-s 0 -c {} -e 0.000001 -q'.format(param_C)) + model = train(prob, param) + p_label, p_acc, p_val = predict(y, x, model) + err = error(y, p_label) + print("0/1 error: ", err) + print() + results.append({'lambda': lambda_power, 'error': err}) - print("the largest lambda: {}, log_10(lambda*): {}".format(10**ans['lambda'], ans['lambda'])) +ans, min_err = None, 1 +for i in results: + print(i['error']) + if i['error'] <= min_err: + min_err = i['error'] + ans = i + +print("the largest lambda: {}, log_10(lambda*): {}".format(10**ans['lambda'], ans['lambda'])) diff --git a/hw4/hw4_11.py b/hw4/hw4_11.py index ba8276f..306d4d5 100644 --- a/hw4/hw4_11.py +++ b/hw4/hw4_11.py @@ -43,10 +43,37 @@ def new_split(x, y): train_y, val_y = y[:120], y[120:] return (train_x, train_y), (val_x, val_y) +def transform(features): + output_features = [] + for index, feature in enumerate(features): + output_features.append([ 0 for _ in range(84) ]) + output_features[index][0] = 1 + + d_index = 1 + # 1-order + for i in feature: + output_features[index][d_index] = i + d_index += 1 + + # 2-orde + for i in range(len(feature)): + for j in range(i, len(feature)): + output_features[index][d_index] = feature[i]*feature[j] + d_index += 1 + # 3-order + for i in range(len(feature)): + for j in range(i, len(feature)): + for k in range(j, len(feature)): + output_features[index][d_index] = i*j*k + d_index += 1 + return output_features + x, y = read_data(FILENAME) +x = transform(x) x = format(x) log_lambda = [] -for i in range(128): +for index in range(128): + random.seed(datetime.datetime.now().timestamp()+index) (train_x, train_y), (val_x, val_y) = new_split(x, y) random.seed(datetime.datetime.now().timestamp()+i) prob = problem(train_y, train_x) diff --git a/hw4/hw4_12.py b/hw4/hw4_12.py index edb45e9..59f69ec 100644 --- a/hw4/hw4_12.py +++ b/hw4/hw4_12.py @@ -52,7 +52,33 @@ def new_split(x, y): return folds +def transform(features): + output_features = [] + for index, feature in enumerate(features): + output_features.append([ 0 for _ in range(84) ]) + output_features[index][0] = 1 + + d_index = 1 + # 1-order + for i in feature: + output_features[index][d_index] = i + d_index += 1 + + # 2-orde + for i in range(len(feature)): + for j in range(i, len(feature)): + output_features[index][d_index] = feature[i]*feature[j] + d_index += 1 + # 3-order + for i in range(len(feature)): + for j in range(i, len(feature)): + for k in range(j, len(feature)): + output_features[index][d_index] = i*j*k + d_index += 1 + return output_features + x, y = read_data(FILENAME) +x = transform(x) x = format(x) log_lambda = [] lambda_powers = [-6, -4, -2, 0, 2] diff --git a/hw5/hw5_10.py b/hw5/hw5_10.py new file mode 100644 index 0000000..d0878a3 --- /dev/null +++ b/hw5/hw5_10.py @@ -0,0 +1,48 @@ +import numpy as np +import datetime +import random +from libsvm.svmutil import * +import matplotlib.pyplot as plt + +FILENAME = "satimage.scale" +TEST_FILENAME = "satimage.scale.t" +TARGET = 1 + +def new_label(y, target): + ans = [] + for i in y: + if i == target: + ans.append(1) + else: + ans.append(0) + return ans + +def error(predict, gt): + error_count = 0 + for index in range(len(predict)): + if predict[index] != gt[index]: + error_count += 1 + return error_count / len(predict) + +if __name__ == '__main__': + + y, x = svm_read_problem(FILENAME) + y = new_label(y, TARGET) + + test_y, test_x = svm_read_problem(TEST_FILENAME) + test_y = new_label(test_y, TARGET) + + for c in [0.01, 0.1, 1, 10, 100]: + print("C=", c) + prob = svm_problem(y, x) + param = svm_parameter('-s 0 -t 2 -g 1 -c {} -q'.format(c)) + m = svm_train(prob, param) + + p_label, p_acc, p_val = svm_predict(test_y, test_x, m) + my_error = error(p_label, test_y) + print("p_acc:", p_acc) + print("0/1 error:", my_error) + + + print("="*20) + diff --git a/hw5/hw5_9.py b/hw5/hw5_9.py new file mode 100644 index 0000000..13d058e --- /dev/null +++ b/hw5/hw5_9.py @@ -0,0 +1,30 @@ +import numpy as np +import datetime +import random +from libsvm.svmutil import * +import matplotlib.pyplot as plt + +FILENAME = "satimage.scale" +TEST_FILENAME = "satimage.scale.t" + +def new_label(y, target): + ans = [] + for i in y: + if i == target: + ans.append(1) + else: + ans.append(0) + return ans + +if __name__ == '__main__': + y, x = svm_read_problem(FILENAME) + y = new_label(y, 4) + + for c in [0.1, 1, 10]: + for q in [2, 3, 4]: + print("(C, Q)=({}, {})".format(c, q)) + prob = svm_problem(y, x) + param = svm_parameter('-s 0 -t 1 -d {} -c {}'.format(q, c)) + m = svm_train(prob, param) + print("="*20) +