import numpy as np import datetime import random from liblinear.liblinearutil import * import matplotlib.pyplot as plt FILENAME = "hw4_train.dat" def read_data(filename): with open(filename) as fp: lines = fp.readlines() x, y = [], [] for line in lines: numbers = [ float(i) for i in line.split() ] x.append(numbers[:-1]) y.append(int(numbers[-1])) return x, y def format(features): ''' change to LIBSVM format ''' results = [] for feature in features: result = {} for index, value in enumerate(feature): if value != 0.0: result[index+1] = value results.append(result) return results def error(gt, pred): err = 0 for index in range(len(gt)): err = (err+1) if gt[index]!=pred[index] else err return err/len(gt) def new_split(x, y): random.seed(datetime.datetime.now().timestamp()) data = list(zip(x, y)) random.shuffle(data) x, y = zip(*data) folds = [] head, tail = 0, 40 while head < len(x): folds.append( (x[head:tail], y[head:tail]) ) head += 40 tail += 40 return folds x, y = read_data(FILENAME) x = format(x) log_lambda = [] lambda_powers = [-6, -4, -2, 0, 2] for _ in range(128): folds = new_split(x, y) errors = [ 0 for _ in range(len(lambda_powers)) ] results = [] for val_index in range(len(folds)): train_x, train_y = [], [] val_x, val_y = [], [] for i in range(len(folds)): if i == val_index: val_x = folds[i][0] val_y = folds[i][1] else: train_x += folds[i][0] train_y += folds[i][1] prob = problem(train_y, train_x) for index, lambda_power in enumerate(lambda_powers): lambda_value = 10 ** lambda_power param_C = 1/(2*lambda_value) param = parameter('-s 0 -c {} -e 0.000001 -q'.format(param_C)) model = train(prob, param) p_label, p_acc, p_val = predict(val_y, val_x, model) err = error(val_y, p_label) errors[index] += err for index, lambda_power in enumerate(lambda_powers): results.append({'lambda': lambda_power, 'error': errors[index]/len(folds)}) ans, min_err = None, 1 for i in results: if i['error'] <= min_err: min_err = i['error'] ans = i print("the largest lambda: {}, log_10(lambda*): {}".format(10**ans['lambda'], ans['lambda'])) print() log_lambda.append(ans['lambda']) plt.hist(log_lambda) plt.savefig("hw4_12.png")