From 110631594b4f79548be6a32c081494d542b695d8 Mon Sep 17 00:00:00 2001 From: snsd0805 Date: Wed, 15 Nov 2023 20:44:06 +0800 Subject: [PATCH] fix: add feature transform --- hw4/hw4_10.py | 26 ++++++++++++++++++++++++++ hw4/hw4_11.py | 30 ++++++++++++++++++++++++++++-- hw4/hw4_12.py | 26 ++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 2 deletions(-) diff --git a/hw4/hw4_10.py b/hw4/hw4_10.py index 38c0fa3..23bcdf7 100644 --- a/hw4/hw4_10.py +++ b/hw4/hw4_10.py @@ -33,7 +33,33 @@ def error(gt, pred): err = (err+1) if gt[index]!=pred[index] else err return err/len(gt) +def transform(features): + output_features = [] + for index, feature in enumerate(features): + output_features.append([ 0 for _ in range(84) ]) + output_features[index][0] = 1 + + d_index = 1 + # 1-order + for i in feature: + output_features[index][d_index] = i + d_index += 1 + + # 2-orde + for i in range(len(feature)): + for j in range(i, len(feature)): + output_features[index][d_index] = feature[i]*feature[j] + d_index += 1 + # 3-order + for i in range(len(feature)): + for j in range(i, len(feature)): + for k in range(j, len(feature)): + output_features[index][d_index] = i*j*k + d_index += 1 + return output_features + x, y = read_data(FILENAME) +x = transform(x) x = format(x) prob = problem(y, x) lambda_powers = [-6, -4, -2, 0, 2] diff --git a/hw4/hw4_11.py b/hw4/hw4_11.py index 1eacc73..ce52017 100644 --- a/hw4/hw4_11.py +++ b/hw4/hw4_11.py @@ -36,7 +36,6 @@ def error(gt, pred): return err/len(gt) def new_split(x, y): - random.seed(datetime.datetime.now().timestamp()) data = list(zip(x, y)) random.shuffle(data) x, y = zip(*data) @@ -44,10 +43,37 @@ def new_split(x, y): train_y, val_y = y[:120], y[120:] return (train_x, train_y), (val_x, val_y) +def transform(features): + output_features = [] + for index, feature in enumerate(features): + output_features.append([ 0 for _ in range(84) ]) + output_features[index][0] = 1 + + d_index = 1 + # 1-order + for i in feature: + output_features[index][d_index] = i + d_index += 1 + + # 2-orde + for i in range(len(feature)): + for j in range(i, len(feature)): + output_features[index][d_index] = feature[i]*feature[j] + d_index += 1 + # 3-order + for i in range(len(feature)): + for j in range(i, len(feature)): + for k in range(j, len(feature)): + output_features[index][d_index] = i*j*k + d_index += 1 + return output_features + x, y = read_data(FILENAME) +x = transform(x) x = format(x) log_lambda = [] -for _ in range(128): +for index in range(128): + random.seed(datetime.datetime.now().timestamp()+index) (train_x, train_y), (val_x, val_y) = new_split(x, y) prob = problem(train_y, train_x) diff --git a/hw4/hw4_12.py b/hw4/hw4_12.py index edb45e9..59f69ec 100644 --- a/hw4/hw4_12.py +++ b/hw4/hw4_12.py @@ -52,7 +52,33 @@ def new_split(x, y): return folds +def transform(features): + output_features = [] + for index, feature in enumerate(features): + output_features.append([ 0 for _ in range(84) ]) + output_features[index][0] = 1 + + d_index = 1 + # 1-order + for i in feature: + output_features[index][d_index] = i + d_index += 1 + + # 2-orde + for i in range(len(feature)): + for j in range(i, len(feature)): + output_features[index][d_index] = feature[i]*feature[j] + d_index += 1 + # 3-order + for i in range(len(feature)): + for j in range(i, len(feature)): + for k in range(j, len(feature)): + output_features[index][d_index] = i*j*k + d_index += 1 + return output_features + x, y = read_data(FILENAME) +x = transform(x) x = format(x) log_lambda = [] lambda_powers = [-6, -4, -2, 0, 2]