import numpy as np import datetime import random from liblinear.liblinearutil import * import matplotlib.pyplot as plt FILENAME = "hw4_train.dat" def read_data(filename): with open(filename) as fp: lines = fp.readlines() x, y = [], [] for line in lines: numbers = [ float(i) for i in line.split() ] x.append(numbers[:-1]) y.append(int(numbers[-1])) return x, y def format(features): ''' change to LIBSVM format ''' results = [] for feature in features: result = {} for index, value in enumerate(feature): if value != 0.0: result[index+1] = value results.append(result) return results def error(gt, pred): err = 0 for index in range(len(gt)): err = (err+1) if gt[index]!=pred[index] else err return err/len(gt) def new_split(x, y): random.seed(datetime.datetime.now().timestamp()) data = list(zip(x, y)) random.shuffle(data) x, y = zip(*data) train_x, val_x = x[:120], x[120:] train_y, val_y = y[:120], y[120:] return (train_x, train_y), (val_x, val_y) x, y = read_data(FILENAME) x = format(x) log_lambda = [] for _ in range(128): (train_x, train_y), (val_x, val_y) = new_split(x, y) prob = problem(train_y, train_x) lambda_powers = [-6, -4, -2, 0, 2] results = [] for lambda_power in lambda_powers: lambda_value = 10 ** lambda_power param_C = 1/(2*lambda_value) param = parameter('-s 0 -c {} -e 0.000001 -q'.format(param_C)) model = train(prob, param) p_label, p_acc, p_val = predict(val_y, val_x, model) err = error(val_y, p_label) print("0/1 error: ", err) print() results.append({'lambda': lambda_power, 'error': err}) ans, min_err = None, 1 for i in results: if i['error'] <= min_err: min_err = i['error'] ans = i print("the largest lambda: {}, log_10(lambda*): {}".format(10**ans['lambda'], ans['lambda'])) print() log_lambda.append(ans['lambda']) plt.hist(log_lambda) plt.savefig("hw4_11.png")