-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_diffucoder.py
More file actions
82 lines (65 loc) · 2.37 KB
/
test_diffucoder.py
File metadata and controls
82 lines (65 loc) · 2.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import torch
from transformers import AutoModel, AutoTokenizer
from longdllm import adapt_for_long_context
import logging
import re
# logging.basicConfig(level=logging.INFO)
# Load your model as usual
model = AutoModel.from_pretrained(
"apple/DiffuCoder-7B-Instruct",
dtype=torch.float16,
attn_implementation="flash_attention_2",
trust_remote_code=True
)
model_path = "apple/DiffuCoder-7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = adapt_for_long_context(model, target_length=131072)
model = model.to("cuda").eval()
with open("./passkey-128k-idx-4.txt", "r") as f:
query = f.read().strip()
passkey_phrase = re.search(r'\d+', query).group(0)
prompt = f"""<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{query.strip()}
<|im_end|>
<|im_start|>assistant
""" ## following the template of qwen; you can also use apply_chat_template function
TOKEN_PER_STEP = 1 # diffusion timesteps * TOKEN_PER_STEP = total new tokens
inputs = tokenizer(prompt, return_tensors="pt")
print("\n" + "="*60)
print("LONGDLLM DIFFUCODER TEST - PASSKEY RETRIEVAL TASK")
print("="*60)
print(f"📄 Input length: {len(inputs.input_ids[0])} tokens")
print(f"🔑 Hidden passkey (ground truth): {passkey_phrase}")
first_sentence = query.split('\n')[0] + '.' if '\n' in query else query[:100] + '...'
print(f"\n first line of query: \"{first_sentence}\"")
input_ids = inputs.input_ids.to(device="cuda")
attention_mask = inputs.attention_mask.to(device="cuda")
print("\n🚀 Running LongDLLM-adapted model with diffusion generation...")
# Use the adapted model with long sequences
output = model.diffusion_generate(
input_ids,
attention_mask=attention_mask,
max_new_tokens=10,
output_history=True,
return_dict_in_generate=True,
steps=10,
temperature=0.3,
top_p=0.95,
alg="entropy",
alg_temp=0.,
)
generations = [
tokenizer.decode(g[len(p) :].tolist())
for p, g in zip(input_ids, output.sequences)
]
response = generations[0].split('<|dlm_pad|>')[0]
print("\n" + "="*60)
print("RESULTS")
print("="*60)
print(f"🤖 Model's raw output: '{response.strip()}'")
answer = re.search(r'\d+', response).group(0)
print(f"🔍 Extracted answer: {answer}")
print(f"✅ Success: {answer == passkey_phrase}" if answer == passkey_phrase else f"❌ Failed: Expected {passkey_phrase}, got {answer}")
print("="*60)