import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from activation_steering import SteeringDataset, SteeringVector
model = AutoModelForCausalLM.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B", device_map='auto', torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B")
with open("docs/demo-data/alpaca.json", 'r') as file:
alpaca_data = json.load(file)
with open("docs/demo-data/behavior_refusal.json", 'r') as file:
refusal_data = json.load(file)
questions = alpaca_data['train']
refusal = refusal_data['non_compliant_responses']
compliace = refusal_data['compliant_responses']
refusal_behavior_dataset = SteeringDataset(
tokenizer=tokenizer,
examples=[(item["question"], item["question"]) for item in questions[:50]],
suffixes=list(zip(refusal[:5], compliace[:5]))
)
accumulate_last_x_tokens=1
refusal_behavior_vector = SteeringVector.train(
model=model,
tokenizer=tokenizer,
steering_dataset=refusal_behavior_dataset,
method="pca_center",
accumulate_last_x_tokens="suffix-only",
save_analysis = True
)
refusal_behavior_vector.save('refusal_behavior_vector')
When I follow the quickstart.md. In the function batched_get_hiddens(), some tensor values in the hidden states (out) contain NaN, disrupting downstream processes. Any suggestions?
When I follow the quickstart.md. In the function batched_get_hiddens(), some tensor values in the hidden states (out) contain NaN, disrupting downstream processes. Any suggestions?