This commit is contained in:
Ting-Jun Wang 2024-06-11 15:00:17 +08:00
commit becfb3e0f3
Signed by: snsd0805
GPG Key ID: 48D331A3D6160354
8 changed files with 517 additions and 0 deletions

129
DPO.py Normal file
View File

@ -0,0 +1,129 @@
import gc
import os
import json
import utils
import torch
import wandb
from tqdm.auto import tqdm
from trl import DPOTrainer
from datasets import load_dataset
from unsloth import FastLanguageModel
from unsloth import is_bfloat16_supported
from transformers import TrainingArguments, TextStreamer
def DPO_train(args, output_dir):
wandb.login(key=args.wandb_token)
wandb.init(project="hw6_rlhf",
name=f"{args.exp_name}_{args.model_name.split('/')[1]}")
torch_dtype = torch.bfloat16 if is_bfloat16_supported() else torch.float16
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device\n")
# Load dataset
# ================================DO NOT CHANGE!================================
dataset = load_dataset("Intel/orca_dpo_pairs", split="train")
dataset = dataset.rename_column('question', 'prompt')
dataset = dataset.train_test_split(test_size=0.01)
with open("./test_prompt.json", 'r') as f:
test_data = json.load(f)
# ================================DO NOT CHANGE!================================
# Model
# model, tokenizer = FastLanguageModel.from_pretrained(model_name=args.model_name,...)
utils.YOUR_CODE_HERE
# Perform model patching and add fast LoRA weights
# model = FastLanguageModel.get_peft_model(model,...)
utils.YOUR_CODE_HERE
# Training arguments
training_args = TrainingArguments(
per_device_train_batch_size=args.train_batch_size,
per_device_eval_batch_size=args.eval_batch_size,
gradient_accumulation_steps=args.gradient_accumulation_steps,
gradient_checkpointing=True,
learning_rate=args.lr,
lr_scheduler_type=args.lr_scheduler_type,
max_steps=args.max_steps,
num_train_epochs=args.num_epochs,
optim=args.optimizer,
weight_decay=args.weight_decay,
max_grad_norm=args.max_grad_norm,
warmup_ratio=args.warmup_ratio,
fp16=not is_bfloat16_supported(),
bf16=is_bfloat16_supported(),
logging_strategy=args.logging_strategy,
logging_steps=args.logging_steps,
evaluation_strategy=args.evaluation_strategy,
eval_steps=args.eval_steps,
output_dir=output_dir,
save_strategy=args.save_strategy,
report_to=args.report_to
)
# Trainer
dpo_trainer = DPOTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=utils.YOUR_CODE_HERE,
eval_dataset=utils.YOUR_CODE_HERE,
args=training_args,
beta=args.beta,
max_length=args.max_length,
max_prompt_length=args.max_prompt_length
)
# Fine-tune model with DPO
dpo_trainer.train()
# Save model
model.save_pretrained(output_dir)
# Inference
FastLanguageModel.for_inference(model)
text_streamer = TextStreamer(tokenizer)
output_data = []
for data in tqdm(test_data):
print("=============Generated Answer After Fine-tuning=============\n")
print(f"Question {data['id']}:\n"+data["prompt"])
prompt = utils.alpaca_prompt.format(
"You are a helpful assistant chatbot.", # Instruction
data["prompt"], # Input
"", # Response, leave empty for generation
)
prompt = tokenizer(prompt, return_tensors="pt").to("cuda")
generated_sequences = model.generate(**prompt, streamer=text_streamer,
max_new_tokens=500)
# Decode the generated output
generated_text = tokenizer.batch_decode(
generated_sequences, skip_special_tokens=True)[0]
print("==============================================================\n")
# Store the output in a list
output_data.append({
"id": data["id"],
"prompt": data["prompt"],
"generated_text": generated_text
})
# Ensure the submission directory exists
submission_dir = "submission"
if not os.path.exists(submission_dir):
os.makedirs(submission_dir)
# Write the output data to a JSON file
output_file = os.path.join(submission_dir, f"DPO_{args.model_name.split('/')[1]}.json")
utils.write_json(output_data, output_file)
# Flush memory
del dpo_trainer, model
gc.collect()
torch.cuda.empty_cache()

129
ORPO.py Normal file
View File

@ -0,0 +1,129 @@
import gc
import os
import json
import utils
import torch
import wandb
from tqdm.auto import tqdm
from datasets import load_dataset
from transformers import TextStreamer
from unsloth import FastLanguageModel
from trl import ORPOConfig, ORPOTrainer
from unsloth import is_bfloat16_supported
def ORPO_train(args, output_dir):
wandb.login(key=args.wandb_token)
wandb.init(project="hw6_rlhf",
name=f"{args.exp_name}_{args.model_name.split('/')[1]}")
torch_dtype = torch.bfloat16 if is_bfloat16_supported() else torch.float16
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device\n")
# Model
# model, tokenizer = FastLanguageModel.from_pretrained(args.model_name,...)
utils.YOUR_CODE_HERE
# Load dataset
# ================================DO NOT CHANGE!================================
dataset = load_dataset("Intel/orca_dpo_pairs", split="train")
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
dataset = dataset.map(utils.format_prompt, fn_kwargs={"EOS_TOKEN": EOS_TOKEN})
dataset = dataset.train_test_split(test_size=0.01)
with open("./test_prompt.json", 'r') as f:
test_data = json.load(f)
# ================================DO NOT CHANGE!================================
# Perform model patching and add fast LoRA weights
# model = FastLanguageModel.get_peft_model(model,...)
utils.YOUR_CODE_HERE
# Training arguments
training_args = ORPOConfig(
per_device_train_batch_size=args.train_batch_size,
per_device_eval_batch_size=args.eval_batch_size,
gradient_accumulation_steps=args.gradient_accumulation_steps,
beta=args.beta,
learning_rate=args.lr,
lr_scheduler_type=args.lr_scheduler_type,
max_steps=args.max_steps,
num_train_epochs=args.num_epochs,
optim=args.optimizer,
weight_decay=args.weight_decay,
max_grad_norm=args.max_grad_norm,
warmup_ratio=args.warmup_ratio,
max_length=args.max_length,
max_prompt_length=args.max_prompt_length,
max_completion_length=args.max_prompt_length,
fp16=not is_bfloat16_supported(),
bf16=is_bfloat16_supported(),
logging_strategy=args.logging_strategy,
logging_steps=args.logging_steps,
evaluation_strategy=args.evaluation_strategy,
eval_steps=args.eval_steps,
output_dir=output_dir,
save_strategy=args.save_strategy,
report_to=args.report_to
)
# Trainer
orpo_trainer = ORPOTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=utils.YOUR_CODE_HERE,
eval_dataset=utils.YOUR_CODE_HERE,
args=training_args,
)
# Fine-tune model with DPO
orpo_trainer.train()
# Save model
model.save_pretrained(output_dir)
# Inference
FastLanguageModel.for_inference(model)
text_streamer = TextStreamer(tokenizer)
output_data = []
for data in tqdm(test_data):
print("=============Generated Answer After Fine-tuning=============\n")
print(f"Question {data['id']}:\n"+data["prompt"])
prompt = utils.alpaca_prompt.format(
"You are a helpful assistant chatbot.", # Instruction
data["prompt"], # Input
"", # Response, leave empty for generation
)
prompt = tokenizer(prompt, return_tensors="pt").to("cuda")
generated_sequences = model.generate(**prompt, streamer=text_streamer,
max_new_tokens=500)
# Decode the generated output
generated_text = tokenizer.batch_decode(
generated_sequences, skip_special_tokens=True)[0]
print("==============================================================\n")
# Store the output in a list
output_data.append({
"id": data["id"],
"prompt": data["prompt"],
"generated_text": generated_text
})
# Ensure the submission directory exists
submission_dir = "submission"
if not os.path.exists(submission_dir):
os.makedirs(submission_dir)
# Write the output data to a JSON file
output_file = os.path.join(submission_dir, f"ORPO_{args.model_name.split('/')[1]}.json")
utils.write_json(output_data, output_file)
# Flush memory
del orpo_trainer, model
gc.collect()
torch.cuda.empty_cache()

78
inference.py Normal file
View File

@ -0,0 +1,78 @@
import gc
import os
import json
import utils
import torch
import wandb
from tqdm.auto import tqdm
from unsloth import FastLanguageModel
from transformers import TextStreamer
from unsloth import is_bfloat16_supported
def LLM_inference(args):
wandb.login(key=args.wandb_token)
wandb.init(project="hw6_rlhf",
name=f"{args.exp_name}_{args.model_name.split('/')[1]}")
torch_dtype = torch.bfloat16 if is_bfloat16_supported() else torch.float16
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device\n")
# Load dataset
# ================================DO NOT CHANGE!================================
with open("./test_prompt.json", 'r') as f:
test_data = json.load(f)
# ================================DO NOT CHANGE!================================
# Model
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=args.model_name,
max_seq_length=args.max_length,
dtype=torch_dtype,
load_in_4bit=True,
)
# Inference
FastLanguageModel.for_inference(model)
text_streamer = TextStreamer(tokenizer)
output_data = []
for data in tqdm(test_data):
print("=============Generated Answer After Fine-tuning=============\n")
print(f"Question {data['id']}:\n"+data["prompt"])
prompt = utils.alpaca_prompt.format(
"You are a helpful assistant chatbot.", # Instruction
data["prompt"], # Input
"", # Response, leave empty for generation
)
prompt = tokenizer(prompt, return_tensors="pt").to("cuda")
generated_sequences = model.generate(**prompt, streamer=text_streamer,
max_new_tokens=500)
# Decode the generated output
generated_text = tokenizer.batch_decode(
generated_sequences, skip_special_tokens=True)[0]
print("==============================================================\n")
# Store the output in a list
output_data.append({
"id": data["id"],
"prompt": data["prompt"],
"generated_text": generated_text
})
# Ensure the submission directory exists
submission_dir = "submission"
if not os.path.exists(submission_dir):
os.makedirs(submission_dir)
# Write the output data to a JSON file
output_file = os.path.join(submission_dir, f"{args.model_name.split('/')[1]}.json")
utils.write_json(output_data, output_file)
# Flush memory
del model, tokenizer
gc.collect()
torch.cuda.empty_cache()

6
inference.sh Normal file
View File

@ -0,0 +1,6 @@
#!/bin/bash
python main.py \
--model_name "${1}" \
--inference_base_model \
--wandb_token "${2}"

94
main.py Normal file
View File

@ -0,0 +1,94 @@
import DPO
import ORPO
import time
import logging
import inference
import argparse
from pathlib import Path
def log_hyperparameters(args):
logging.info("Hyperparameters:")
for arg in vars(args):
logging.info(f"{arg}: {getattr(args, arg)}")
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--exp_name", type=str,
choices=["DPO", "ORPO"])
parser.add_argument("--model_name", type=str,
choices=["unsloth/llama-3-8b-bnb-4bit",
"unsloth/mistral-7b-v0.3-bnb-4bit"],
required=True)
parser.add_argument("--train", action="store_true")
parser.add_argument("--inference_base_model", action="store_true")
parser.add_argument("--wandb_token", type=str, required=True)
parser.add_argument("--train_batch_size", type=int, default=2)
parser.add_argument("--eval_batch_size", type=int, default=2)
parser.add_argument("--gradient_accumulation_steps", type=int, default=8)
parser.add_argument("--lr", type=float, default=5e-6)
parser.add_argument("--lr_scheduler_type", type=str,
default="cosine", choices=["cosine", "linear"])
parser.add_argument("--max_steps", type=int, default=0, choices=[500, 1000, 1500])
parser.add_argument("--num_epochs", type=int, choices=[1, 3, 5])
parser.add_argument("--optimizer", type=str, default="paged_adamw_32bit",
choices=["paged_adamw_32bit", "paged_adamw_8bit"])
parser.add_argument("--weight_decay", type=float, default=0)
parser.add_argument("--max_grad_norm", type=float, default=0)
parser.add_argument("--warmup_ratio", type=float, default=0)
parser.add_argument("--beta", type=float, default=0.1)
parser.add_argument("--max_length", type=int, default=1024)
parser.add_argument("--max_prompt_length", type=int, default=512)
parser.add_argument("--seed", type=int, default=2024)
parser.add_argument("--logging_strategy", type=str,
default="steps", choices=["steps", "epoch"])
parser.add_argument("--logging_steps", type=int, default=1)
parser.add_argument("--evaluation_strategy", type=str,
default="steps", choices=["steps", "epoch"])
parser.add_argument("--eval_steps", type=int, default=100)
parser.add_argument("--output_dir", type=str, default="./outputs")
parser.add_argument("--save_strategy", type=str, default="epoch")
parser.add_argument("--report_to", type=str, default="wandb")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
# Create a timestamp
current_time = time.strftime("%Y%m%d-%H%M%S")
print(f"Current time: {current_time}\n")
# Create the output directory path
output_dir = Path(f"{args.output_dir}/{args.exp_name}_{current_time}")
# Create the directory if it doesn't exist
if not output_dir.exists():
output_dir.mkdir(parents=True)
print(f"Created output directory at: {output_dir}\n")
# Set up logging
log_file_name = output_dir / f"{args.exp_name}-{current_time}.log"
logging.basicConfig(filename=log_file_name,
level=logging.INFO, format="%(asctime)s - %(message)s")
log_hyperparameters(args)
if args.train:
if args.exp_name == "DPO":
DPO.DPO_train(args, output_dir)
elif args.exp_name == "ORPO":
ORPO.ORPO_train(args, output_dir)
else:
raise ValueError("Invalid experiment name")
if args.inference_base_model:
if args.model_name == "unsloth/llama-3-8b-bnb-4bit":
print("Inference with base model: unsloth/llama-3-8b-bnb-4bit")
inference.LLM_inference(args)
elif args.model_name == "unsloth/mistral-7b-v0.3-bnb-4bit":
print("Inference with base model: unsloth/mistral-7b-v0.3-bnb-4bit")
inference.LLM_inference(args)
else:
raise ValueError("Invalid model name")

8
run.sh Normal file
View File

@ -0,0 +1,8 @@
#!/bin/bash
python main.py \
--exp_name "${1}" \
--model_name "${2}" \
--train \
--wandb_token "${3}" \
--num_epochs 1 \

42
test_prompt.json Normal file
View File

@ -0,0 +1,42 @@
[
{
"id": 1,
"prompt": "How many colors are traditionally recognized in a visible spectrum or optical rainbow?"
},
{
"id": 2,
"prompt": "In a basket, there are 20 oranges, 60 apples, and 40 bananas. If 15 pears were added, and half of the oranges were removed, what would be the new ratio of oranges to apples, bananas, and pears combined within the basket?"
},
{
"id": 3,
"prompt": "If you were a car salesperson, how would you convince a potential buyer to purchase an electric vehicle?"
},
{
"id": 4,
"prompt": "Break down the process of photosynthesis into a bullet-pointed list, detailing each stage and the overall purpose of the process in sustaining life on Earth."
},
{
"id": 5,
"prompt": "Explain why college students should get a library card."
},
{
"id": 6,
"prompt": "What popular toy was invented in 1943 by Richard James, an engineer who accidentally knocked a torsion spring off his workbench?"
},
{
"id": 7,
"prompt": "Which famous movie prop designs have become iconic symbols in film history?"
},
{
"id": 8,
"prompt": "A curve with a 120 m radius on a level road is banked at the correct angle for a speed of 20 m/s. If an automobile rounds this curve at 30 m/s, what is the minimum coefficient of static friction needed between tires and road to prevent skidding?"
},
{
"id": 9,
"prompt": "What can't you see, hear or feel, until its too late. What shadows love, and shopkeepers hate?"
},
{
"id": 10,
"prompt": "In a basket, there are 20 oranges, 60 apples, and 40 bananas. If 15 pears were added, and half of the oranges were removed, what would be the new ratio of oranges to apples, bananas, and pears combined within the basket?"
}
]

31
utils.py Normal file
View File

@ -0,0 +1,31 @@
import json
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:
{}"""
def format_prompt(sample, EOS_TOKEN):
instruction = sample["system"]
input = sample["question"]
accepted = sample["chosen"]
rejected = sample["rejected"]
sample["prompt"] = alpaca_prompt.format(instruction, input, "")
sample["chosen"] = accepted + EOS_TOKEN
sample["rejected"] = rejected + EOS_TOKEN
return sample
def write_json(output_data, output_file):
with open(output_file, 'w') as f:
json.dump(output_data, f, indent=4)
YOUR_CODE_HERE = "*** YOUR CODE HERE ***"