-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpt.txt
67 lines (67 loc) · 2.49 KB
/
gpt.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
GPTModel:
GPTModel(
(tok_emb): Embedding(65, 48)
(pos_emb): Embedding(128, 48)
(drop_emb): Dropout(p=0.1, inplace=False)
(trf_blocks): Sequential(
(0): TransformerBlock(
(att): MultiHeadAttention(
(W_query): Linear(in_features=48, out_features=48, bias=False)
(W_key): Linear(in_features=48, out_features=48, bias=False)
(W_value): Linear(in_features=48, out_features=48, bias=False)
(out_proj): Linear(in_features=48, out_features=48, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(ff): FeedForward(
(layers): Sequential(
(0): Linear(in_features=48, out_features=192, bias=True)
(1): GELU()
(2): Linear(in_features=192, out_features=48, bias=True)
)
)
(norm1): LayerNorm()
(norm2): LayerNorm()
(drop_shortcut): Dropout(p=0.1, inplace=False)
)
(1): TransformerBlock(
(att): MultiHeadAttention(
(W_query): Linear(in_features=48, out_features=48, bias=False)
(W_key): Linear(in_features=48, out_features=48, bias=False)
(W_value): Linear(in_features=48, out_features=48, bias=False)
(out_proj): Linear(in_features=48, out_features=48, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(ff): FeedForward(
(layers): Sequential(
(0): Linear(in_features=48, out_features=192, bias=True)
(1): GELU()
(2): Linear(in_features=192, out_features=48, bias=True)
)
)
(norm1): LayerNorm()
(norm2): LayerNorm()
(drop_shortcut): Dropout(p=0.1, inplace=False)
)
(2): TransformerBlock(
(att): MultiHeadAttention(
(W_query): Linear(in_features=48, out_features=48, bias=False)
(W_key): Linear(in_features=48, out_features=48, bias=False)
(W_value): Linear(in_features=48, out_features=48, bias=False)
(out_proj): Linear(in_features=48, out_features=48, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(ff): FeedForward(
(layers): Sequential(
(0): Linear(in_features=48, out_features=192, bias=True)
(1): GELU()
(2): Linear(in_features=192, out_features=48, bias=True)
)
)
(norm1): LayerNorm()
(norm2): LayerNorm()
(drop_shortcut): Dropout(p=0.1, inplace=False)
)
)
(final_norm): LayerNorm()
(out_head): Linear(in_features=48, out_features=65, bias=False)
)