Skip to content

Commit b1428e6

Browse files
authored
add Custom NPU support for several models (PaddlePaddle#4956)
* add Custom NPU support for several models * modify format * revert pretrain model of ernie-matching
1 parent d2c8174 commit b1428e6

File tree

33 files changed

+213
-177
lines changed

33 files changed

+213
-177
lines changed

examples/information_extraction/msra_ner/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ python -u ./train.py \
4545
- `logging_steps`: 表示日志打印间隔。
4646
- `save_steps`: 表示模型保存及评估间隔。
4747
- `output_dir`: 表示模型保存路径。
48-
- `device`: 训练使用的设备, 'gpu'表示使用GPU, 'xpu'表示使用百度昆仑卡, 'cpu'表示使用CPU。
48+
- `device`: 训练使用的设备, 'gpu'表示使用GPU, 'xpu'表示使用百度昆仑卡, 'cpu'表示使用CPU, 'npu'表示使用华为昇腾卡
4949

5050
#### 多卡训练
5151
```shell

examples/information_extraction/msra_ner/predict.py

+3-11
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,11 @@
1313
# limitations under the License.
1414

1515
import argparse
16-
import os
17-
import ast
18-
import random
19-
import time
20-
import math
21-
from functools import partial
22-
23-
import numpy as np
16+
2417
import paddle
18+
from datasets import load_dataset
2519
from paddle.io import DataLoader
2620

27-
from datasets import load_dataset
2821
from paddlenlp.data import DataCollatorForTokenClassification
2922
from paddlenlp.transformers import BertForTokenClassification, BertTokenizer
3023

@@ -35,7 +28,7 @@
3528
parser.add_argument("--init_checkpoint_path", default=None, type=str, required=True, help="The model checkpoint path.", )
3629
parser.add_argument("--max_seq_length", default=128, type=int, help="The maximum total input sequence length after tokenization. Sequences longer " "than this will be truncated, sequences shorter will be padded.", )
3730
parser.add_argument("--batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.", )
38-
parser.add_argument("--device", default="gpu", type=str, choices=["cpu", "gpu", "xpu"] ,help="The device to select to train the model, is must be cpu/gpu/xpu.")
31+
parser.add_argument("--device", default="gpu", type=str, choices=["cpu", "gpu", "xpu", "npu"] , help="The device to select to train the model, is must be cpu/gpu/xpu/npu.")
3932
# yapf: enable
4033

4134

@@ -100,7 +93,6 @@ def tokenize_and_align_labels(examples):
10093
tokenized_inputs["labels"] = labels
10194
return tokenized_inputs
10295

103-
ignore_label = -100
10496
batchify_fn = DataCollatorForTokenClassification(tokenizer)
10597

10698
id2label = dict(enumerate(label_list))

examples/information_extraction/msra_ner/train.py

+14-14
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,23 @@
1414

1515
import argparse
1616
import os
17-
import random
1817
import time
19-
import math
20-
from functools import partial
2118

22-
import numpy as np
2319
import paddle
20+
from datasets import load_dataset
2421
from paddle.io import DataLoader
2522

26-
from paddlenlp.transformers import LinearDecayWithWarmup
27-
from paddlenlp.metrics import ChunkEvaluator
28-
from datasets import load_dataset
29-
from paddlenlp.transformers import BertForTokenClassification, BertTokenizer
30-
from paddlenlp.transformers import ErnieForTokenClassification, ErnieTokenizer
31-
from paddlenlp.transformers import ErnieCtmForTokenClassification, ErnieCtmTokenizer
3223
from paddlenlp.data import DataCollatorForTokenClassification
24+
from paddlenlp.metrics import ChunkEvaluator
25+
from paddlenlp.transformers import (
26+
BertForTokenClassification,
27+
BertTokenizer,
28+
ErnieCtmForTokenClassification,
29+
ErnieCtmTokenizer,
30+
ErnieForTokenClassification,
31+
ErnieTokenizer,
32+
LinearDecayWithWarmup,
33+
)
3334
from paddlenlp.utils.log import logger
3435

3536
MODEL_CLASSES = {
@@ -42,8 +43,8 @@
4243

4344
# yapf: disable
4445
parser.add_argument("--model_type", default="bert", type=str, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()), )
45-
parser.add_argument("--model_name_or_path", default=None, type=str, required=True, help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join( sum([ list(classes[-1].pretrained_init_configuration.keys()) for classes in MODEL_CLASSES.values() ], [])), )
46-
parser.add_argument("--dataset", default="msra_ner", type=str, choices=["msra_ner", "peoples_daily_ner"] ,help="The named entity recognition datasets.")
46+
parser.add_argument("--model_name_or_path", default=None, type=str, required=True, help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(sum([list(classes[-1].pretrained_init_configuration.keys()) for classes in MODEL_CLASSES.values()], [])), )
47+
parser.add_argument("--dataset", default="msra_ner", type=str, choices=["msra_ner", "peoples_daily_ner"] , help="The named entity recognition datasets.")
4748
parser.add_argument("--output_dir", default=None, type=str, required=True, help="The output directory where the model predictions and checkpoints will be written.")
4849
parser.add_argument("--max_seq_length", default=128, type=int, help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.")
4950
parser.add_argument("--batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.")
@@ -57,7 +58,7 @@
5758
parser.add_argument("--logging_steps", type=int, default=1, help="Log every X updates steps.")
5859
parser.add_argument("--save_steps", type=int, default=100, help="Save checkpoint every X updates steps.")
5960
parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")
60-
parser.add_argument("--device", default="gpu", type=str, choices=["cpu", "gpu", "xpu"] ,help="The device to select to train the model, is must be cpu/gpu/xpu.")
61+
parser.add_argument("--device", default="gpu", type=str, choices=["cpu", "gpu", "xpu", "npu"] , help="The device to select to train the model, is must be cpu/gpu/xpu/npu.")
6162
# yapf: enable
6263

6364

@@ -179,7 +180,6 @@ def tokenize_and_align_labels(examples):
179180
metric = ChunkEvaluator(label_list=label_list)
180181

181182
global_step = 0
182-
last_step = args.num_train_epochs * len(train_data_loader)
183183
tic_train = time.time()
184184
for epoch in range(args.num_train_epochs):
185185
for step, batch in enumerate(train_data_loader):

examples/language_model/bigbird/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ python -m paddle.distributed.launch --gpus "0" run_glue.py \
127127
- `logging_steps` 表示日志打印间隔。
128128
- `save_steps` 表示模型保存及评估间隔。
129129
- `output_dir` 表示模型保存路径。
130-
- `device` 表示训练使用的设备, 'gpu'表示使用GPU, 'xpu'表示使用百度昆仑卡, 'cpu'表示使用CPU。
130+
- `device` 表示训练使用的设备, 'gpu'表示使用GPU, 'xpu'表示使用百度昆仑卡, 'cpu'表示使用CPU, 'npu'表示使用华为昇腾卡
131131

132132
基于`bigbird-base-uncased`在GLUE各评测任务上Fine-tuning后,在验证集上有如下结果:
133133

examples/language_model/bigbird/args.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ def parse_args():
6969
"--device",
7070
type=str,
7171
default="gpu",
72-
choices=["cpu", "gpu"],
73-
help="Select cpu, gpu, xpu devices to train model.",
72+
choices=["cpu", "gpu", "npu"],
73+
help="Select cpu, gpu, xpu, npu devices to train model.",
7474
)
7575

7676
parser.add_argument("--epochs", type=int, default=10, help="Number of epoches for training.")

examples/language_model/bigbird/run_glue.py

+16-15
Original file line numberDiff line numberDiff line change
@@ -12,32 +12,28 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import argparse
16-
import logging
1715
import os
18-
import sys
1916
import random
2017
import time
21-
import math
22-
import distutils.util
2318
from functools import partial
2419

20+
import args
2521
import numpy as np
2622
import paddle
2723
from paddle.io import DataLoader
28-
from paddle.metric import Metric, Accuracy, Precision, Recall
24+
from paddle.metric import Accuracy
2925

26+
from paddlenlp.data import Stack
3027
from paddlenlp.datasets import load_dataset
31-
from paddlenlp.data import Stack, Tuple, Pad, Dict
32-
from paddlenlp.data.sampler import SamplerHelper
33-
from paddlenlp.transformers import BigBirdModel, BigBirdForSequenceClassification, BigBirdTokenizer
34-
from paddlenlp.transformers import create_bigbird_rand_mask_idx_list
35-
from paddlenlp.transformers import LinearDecayWithWarmup
3628
from paddlenlp.metrics import AccuracyAndF1, Mcc, PearsonAndSpearman
29+
from paddlenlp.transformers import (
30+
BigBirdForSequenceClassification,
31+
BigBirdTokenizer,
32+
LinearDecayWithWarmup,
33+
create_bigbird_rand_mask_idx_list,
34+
)
3735
from paddlenlp.utils.log import logger
3836

39-
import args
40-
4137
METRIC_CLASSES = {
4238
"cola": Mcc,
4339
"sst-2": Accuracy,
@@ -190,7 +186,7 @@ def do_train(args):
190186
train_ds = load_dataset("glue", args.task_name, splits="train")
191187
tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path)
192188

193-
num_classes = 1 if train_ds.label_list == None else len(train_ds.label_list)
189+
num_classes = 1 if train_ds.label_list is None else len(train_ds.label_list)
194190
# In finetune task, bigbird performs better when setting dropout to zero.
195191
model = model_class.from_pretrained(
196192
args.model_name_or_path, num_classes=num_classes, attn_dropout=0.0, hidden_dropout_prob=0.0
@@ -327,5 +323,10 @@ def print_arguments(args):
327323
if __name__ == "__main__":
328324
args = args.parse_args()
329325
print_arguments(args)
330-
assert args.device in ["cpu", "gpu", "xpu"], "Invalid device! Available device should be cpu, gpu, or xpu."
326+
assert args.device in [
327+
"cpu",
328+
"gpu",
329+
"xpu",
330+
"npu",
331+
], "Invalid device! Available device should be cpu, gpu, xpu or npu."
331332
do_train(args)

examples/language_model/convbert/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ python -u examples/language_model/convbert/run_glue.py \
5555
- `logging_steps` 表示日志打印间隔。
5656
- `save_steps` 表示模型保存及评估间隔。
5757
- `output_dir` 表示模型保存路径。
58-
- `device` 表示使用的设备类型。默认为GPU,可以配置为CPU、GPU、XPU。若希望使用多GPU训练,将其设置为GPU,同时环境变量CUDA_VISIBLE_DEVICES配置要使用的GPU id。
58+
- `device` 表示使用的设备类型。默认为GPU,可以配置为CPU、GPU、XPU、NPU。若希望使用多GPU训练,将其设置为GPU,同时环境变量CUDA_VISIBLE_DEVICES配置要使用的GPU id。
5959

6060
Fine-tuning过程将按照 `logging_steps``save_steps` 的设置打印如下格式的日志:
6161

examples/language_model/convbert/run_glue.py

+15-9
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,20 @@
2424
from paddle.io import DataLoader
2525
from paddle.metric import Accuracy
2626

27+
from paddlenlp.data import Pad, Stack, Tuple
2728
from paddlenlp.datasets import load_dataset
28-
from paddlenlp.data import Stack, Tuple, Pad
29-
from paddlenlp.transformers import BertForSequenceClassification, BertTokenizer
30-
from paddlenlp.transformers import ElectraForSequenceClassification, ElectraTokenizer
31-
from paddlenlp.transformers import ErnieForSequenceClassification, ErnieTokenizer
32-
from paddlenlp.transformers import ConvBertForSequenceClassification, ConvBertTokenizer
33-
from paddlenlp.transformers import LinearDecayWithWarmup
3429
from paddlenlp.metrics import AccuracyAndF1, Mcc, PearsonAndSpearman
30+
from paddlenlp.transformers import (
31+
BertForSequenceClassification,
32+
BertTokenizer,
33+
ConvBertForSequenceClassification,
34+
ConvBertTokenizer,
35+
ElectraForSequenceClassification,
36+
ElectraTokenizer,
37+
ErnieForSequenceClassification,
38+
ErnieTokenizer,
39+
LinearDecayWithWarmup,
40+
)
3541

3642
FORMAT = "%(asctime)s-%(levelname)s: %(message)s"
3743
logging.basicConfig(level=logging.INFO, format=FORMAT)
@@ -135,8 +141,8 @@ def parse_args():
135141
"--device",
136142
default="gpu",
137143
type=str,
138-
choices=["cpu", "gpu"],
139-
help="The device to select to train the model, is must be cpu/gpu.",
144+
choices=["cpu", "gpu", "npu"],
145+
help="The device to select to train the model, is must be cpu/gpu/npu.",
140146
)
141147
args = parser.parse_args()
142148
return args
@@ -270,7 +276,7 @@ def do_train(args):
270276
dataset=dev_ds, batch_sampler=dev_batch_sampler, collate_fn=batchify_fn, num_workers=0, return_list=True
271277
)
272278

273-
num_classes = 1 if train_ds.label_list == None else len(train_ds.label_list)
279+
num_classes = 1 if train_ds.label_list is None else len(train_ds.label_list)
274280
model = model_class.from_pretrained(args.model_name_or_path, num_classes=num_classes)
275281
if paddle.distributed.get_world_size() > 1:
276282
model = paddle.DataParallel(model)

examples/language_model/roformer/README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ python -m paddle.distributed.launch --gpus "0" examples/language_model/roformer/
5555
- `logging_steps` 表示日志打印间隔。
5656
- `save_steps` 表示模型保存及评估间隔。
5757
- `output_dir` 表示模型保存路径。
58-
- `device` 表示训练使用的设备, 'gpu'表示使用GPU, 'xpu'表示使用百度昆仑卡, 'cpu'表示使用CPU。
58+
- `device` 表示训练使用的设备, 'gpu'表示使用GPU, 'xpu'表示使用百度昆仑卡, 'cpu'表示使用CPU, 'npu'表示使用华为昇腾卡
5959
- `use_amp` 指示是否启用自动混合精度训练。
6060

6161
基于`roformer-chinese-base`在THUCNews分类任务上Fine-tuning后,在验证集上有如下结果:
@@ -100,7 +100,7 @@ python -m paddle.distributed.launch --gpus "0" examples/language_model/roformer/
100100
- `logging_steps` 表示日志打印间隔。
101101
- `save_steps` 表示模型保存及评估间隔。
102102
- `output_dir` 表示模型保存路径。
103-
- `device` 表示训练使用的设备, 'gpu'表示使用GPU, 'xpu'表示使用百度昆仑卡, 'cpu'表示使用CPU。
103+
- `device` 表示训练使用的设备, 'gpu'表示使用GPU, 'xpu'表示使用百度昆仑卡, 'cpu'表示使用CPU, 'npu'表示使用华为昇腾卡
104104
- `use_amp` 指示是否启用自动混合精度训练。
105105

106106
基于`roformer-chinese-base`在Cail2019_Scm任务上Fine-tuning后,有如下结果:

examples/language_model/roformer/run_cail2019_scm.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,8 @@ def parse_args():
148148
"--device",
149149
default="gpu",
150150
type=str,
151-
choices=["cpu", "gpu", "xpu"],
152-
help="The device to select to train the model, is must be cpu/gpu/xpu.",
151+
choices=["cpu", "gpu", "xpu", "npu"],
152+
help="The device to select to train the model, is must be cpu/gpu/xpu/npu.",
153153
)
154154
parser.add_argument(
155155
"--use_amp",

examples/language_model/roformer/run_thucnews.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@ def parse_args():
114114
"--device",
115115
default="gpu",
116116
type=str,
117-
choices=["cpu", "gpu", "xpu"],
118-
help="The device to select to train the model, is must be cpu/gpu/xpu.",
117+
choices=["cpu", "gpu", "xpu", "npu"],
118+
help="The device to select to train the model, is must be cpu/gpu/xpu/npu.",
119119
)
120120
parser.add_argument("--use_amp", type=strtobool, default=False, help="Enable mixed precision training.")
121121
parser.add_argument("--scale_loss", type=float, default=2**15, help="The value of scale_loss for fp16.")

examples/language_model/t5/README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ python run_glue.py \
3131
--save_steps 100 \
3232
--seed 42 \
3333
--scheduler_type linear \
34-
--output_dir outputs/rte/
34+
--output_dir outputs/rte/ \
35+
--device gpu
3536
```
3637

3738
其中参数释义如下:
@@ -49,6 +50,7 @@ python run_glue.py \
4950
- `seed` 表示随机种子。
5051
- `scheduler_type` scheduler类型,可选linear和cosine,默认linear。
5152
- `output_dir` 表示模型保存路径。
53+
- `device` 表示训练使用的设备,可选cpu、gpu或npu。
5254

5355
使用trainer进行Fine-tuning:
5456

examples/language_model/t5/run_glue.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,24 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import argparse
1516
import logging
1617
import math
1718
import os
1819

1920
import paddle
21+
from data import (
22+
GLUE_PROCESSED,
23+
get_dev_dataloader,
24+
get_mnli_dev_dataloader,
25+
get_train_dataloader,
26+
)
2027
from paddle.amp import GradScaler, auto_cast
2128
from paddle.optimizer import AdamW
22-
from paddlenlp.transformers import T5ForConditionalGeneration, T5Tokenizer
2329
from tqdm import tqdm
24-
25-
from data import get_dev_dataloader, get_train_dataloader, get_mnli_dev_dataloader, GLUE_PROCESSED
2630
from utils import GLUE_METRICS, get_scheduler, get_writer, set_seed
2731

28-
import argparse
32+
from paddlenlp.transformers import T5ForConditionalGeneration, T5Tokenizer
2933

3034

3135
def parse_args():
@@ -139,6 +143,13 @@ def parse_args():
139143
help="num_workers.",
140144
)
141145
parser.add_argument("--is_test", action="store_true", help="is_test.")
146+
parser.add_argument(
147+
"--device",
148+
default="gpu",
149+
type=str,
150+
choices=["gpu", "cpu", "npu"],
151+
help="The device to select to train the model, is must be cpu/gpu/npu.",
152+
)
142153
args = parser.parse_args()
143154
args.task_name = args.task_name.lower()
144155
args.logdir = os.path.join(args.output_dir, "logs")
@@ -197,6 +208,7 @@ def evaluate(model, data_loader, tokenizer, label2id, metric_list, generate_max_
197208

198209

199210
def main(args):
211+
paddle.set_device(args.device)
200212
logging.basicConfig(
201213
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
202214
datefmt="%m/%d/%Y %H:%M:%S",

examples/machine_reading_comprehension/DuReader-robust/args.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
115
import argparse
216

317

@@ -47,7 +61,10 @@ def parse_args():
4761
parser.add_argument("--save_steps", type=int, default=500, help="Save checkpoint every X updates steps.")
4862
parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")
4963
parser.add_argument(
50-
"--device", choices=["cpu", "gpu"], default="gpu", help="Select which device to train model, defaults to gpu."
64+
"--device",
65+
choices=["cpu", "gpu", "npu"],
66+
default="gpu",
67+
help="Select which device to train model, defaults to gpu.",
5168
)
5269
parser.add_argument(
5370
"--doc_stride",

0 commit comments

Comments
 (0)