forked from PaddlePaddle/PaddleNLP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
64 lines (60 loc) Β· 1.99 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
class StackModel(nn.Layer):
def __init__(self, model):
super().__init__()
self.model = model
def forward(
self,
src_ids,
src_sids,
src_pids,
tgt_ids,
tgt_sids,
tgt_pids,
attn_ids,
mask_src_2_src,
mask_tgt_2_srctgt,
mask_attn_2_srctgtattn,
tgt_labels,
tgt_pos,
):
_, __, info = self.model(
src_ids, sent_ids=src_sids, pos_ids=src_pids, attn_bias=mask_src_2_src, encode_only=True
)
cached_k, cached_v = info["caches"]
_, __, info = self.model(
tgt_ids,
sent_ids=tgt_sids,
pos_ids=tgt_pids,
attn_bias=mask_tgt_2_srctgt,
past_cache=(cached_k, cached_v),
encode_only=True,
)
cached_k2, cached_v2 = info["caches"]
past_cache_k = [paddle.concat([k, k2], 1) for k, k2 in zip(cached_k, cached_k2)]
past_cache_v = [paddle.concat([v, v2], 1) for v, v2 in zip(cached_v, cached_v2)]
loss, _, __ = self.model(
attn_ids,
sent_ids=tgt_sids,
pos_ids=tgt_pids,
attn_bias=mask_attn_2_srctgtattn,
past_cache=(past_cache_k, past_cache_v),
tgt_labels=tgt_labels,
tgt_pos=tgt_pos,
)
loss = loss.mean()
return loss