Skip to content

Commit 4440538

Browse files
authored
Fix typos (PaddlePaddle#4959)
1 parent f354fe6 commit 4440538

23 files changed

+50
-50
lines changed

docs/get_started/installation.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ Anaconda是一个开源的Python发行版本,其包含了conda、Python等180
4949
5050
按如上方式配置后,即可在环境中使用PaddleNLP了,命令行输入python回车后,import paddlenlp试试吧,之后再次使用都可以通过打开'所有程序->Anaconda3/2(64-bit)->Anaconda Prompt',再执行conda activate my_paddlenlp进入环境后,即可再次使用PaddleNLP。
5151

52-
2、Linux/Mac安装安装Anaconda
52+
2、Linux/Mac安装Anaconda
5353
>>>>>>>>>
5454

5555
第一步 下载

docs/locale/en/LC_MESSAGES/get_started/installation.po

+1-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ msgid ""
8383
msgstr ""
8484

8585
#: ../get_started/installation.rst:53
86-
msgid "2、Linux/Mac安装安装Anaconda"
86+
msgid "2、Linux/Mac安装Anaconda"
8787
msgstr ""
8888

8989
#: ../get_started/installation.rst:57

docs/model_zoo/embeddings.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
## 介绍
2525

26-
PaddleNLP提供多个开源的预训练词向量模型,用户仅需在使用`paddlenlp.embeddings.TokenEmbedding`时,指定预训练模型的名称,即可加载相对应的预训练模型。以下将介绍`TokenEmbeddign`详细用法,并列出PaddleNLP所支持的预训练Embedding模型。
26+
PaddleNLP提供多个开源的预训练词向量模型,用户仅需在使用`paddlenlp.embeddings.TokenEmbedding`时,指定预训练模型的名称,即可加载相对应的预训练模型。以下将介绍`TokenEmbedding`详细用法,并列出PaddleNLP所支持的预训练Embedding模型。
2727

2828
## 用法
2929

examples/text_to_sql/IGSQL/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ python run.py --raw_train_filename="data/sparc_data_removefrom/train.pkl" \
8080
* embedding_filename: GLOVE 词向量文件路径。
8181
* data_directory: 预处理得到的文件夹路径。
8282
* logdir: 输出日志文件夹路径。
83-
* train,evaluate: 是否执行trian,evaluate。
83+
* train,evaluate: 是否执行train,evaluate。
8484

8585

8686
### 训练阶段的输出日志

examples/text_to_sql/RAT-SQL/evaluation/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# 评估
55
输入文件格式:
66
1. 文件以.sql结尾
7-
2. 文件每行的格式:"qid\tsql_query\tdb_id",其中predcit文件db_id是可选字段,gold文件db_id是必选字段
7+
2. 文件每行的格式:"qid\tsql_query\tdb_id",其中predict文件db_id是可选字段,gold文件db_id是必选字段
88
3. 评估指标:exact matching score
99

1010
# 使用

examples/text_to_sql/RAT-SQL/text2sql/dataproc/ernie_input_encoder_v2.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def tokenize(self, question, db, column_match_cells=None, candi_nums=None, candi
183183
if match_cells is not None and len(match_cells) > 0:
184184
if column.dtype in ("text", "time"):
185185
if not self.config.predict_value:
186-
match_cells = match_cells[:1] # the first cell used to complement senmantics
186+
match_cells = match_cells[:1] # the first cell used to complement semantics
187187
for mcell in match_cells:
188188
value_list.append(mcell)
189189
toks = [self.special_token_dict["value"]] + self.tokenizer.tokenize(mcell)

examples/text_to_sql/RAT-SQL/text2sql/dataproc/sql_preproc_v2.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def add_item(self, section, sql_json, value_list):
267267
self.format_sql_value(sql_json, value_dict)
268268

269269
parsed = self.grammar.parse(sql_json, section)
270-
self.ast_wrapper.verify_ast(parsed) # will raise AssertionError, if varify failed
270+
self.ast_wrapper.verify_ast(parsed) # will raise AssertionError, if verify failed
271271

272272
root = parsed
273273
if section == "train":

examples/text_to_sql/RAT-SQL/text2sql/io.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,12 @@ def init_ernie_model(model_class, model_dir):
3232
return ernie, config["hidden_size"]
3333

3434

35-
def save(model, optimzer, save_path):
35+
def save(model, optimizer, save_path):
3636
try:
3737
paddle.save(model.state_dict(), save_path + ".pdparams")
38-
paddle.save(optimzer.state_dict(), save_path + ".pdopt")
38+
paddle.save(optimizer.state_dict(), save_path + ".pdopt")
3939
except Exception as e:
40-
logging.error("save model and optimzer failed. save path: %s", save_path)
40+
logging.error("save model and optimizer failed. save path: %s", save_path)
4141
logging.error(traceback.format_exc())
4242

4343

examples/text_to_sql/RAT-SQL/text2sql/models/sql_beam_search.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class Hypothesis4Filtering(Hypothesis):
3131

3232
def beam_search_with_heuristics(model, inputs, beam_size, max_steps, from_cond=True):
3333
"""
34-
Find the valid FROM clasue with beam search
34+
Find the valid FROM clause with beam search
3535
"""
3636
orig_inputs = inputs["orig_inputs"][0]
3737
# inference_state, next_choices = model.inference(inputs, orig_inputs.db)
@@ -93,7 +93,7 @@ def beam_search_with_heuristics(model, inputs, beam_size, max_steps, from_cond=T
9393
prefixes2fill_from.sort(key=operator.attrgetter("score"), reverse=True)
9494
# assert len(prefixes) == beam_size
9595

96-
# emuerating
96+
# enumerating
9797
beam_from = prefixes2fill_from
9898
max_size = 6
9999
unfiltered_finished = []

examples/text_to_sql/RAT-SQL/text2sql/optim.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
import paddle
2323

24-
param_name_to_exclue_from_weight_decay = re.compile(r".*layer_norm_scale|.*layer_norm_bias|.*b_0")
24+
param_name_to_exclude_from_weight_decay = re.compile(r".*layer_norm_scale|.*layer_norm_bias|.*b_0")
2525

2626

2727
def get_warmup_and_linear_decay(max_steps, warmup_steps):
@@ -43,7 +43,7 @@ def init_optimizer(model, config, train_steps, scale_params_lr=None):
4343
lr_scheduler,
4444
parameters=model.parameters(),
4545
weight_decay=config.weight_decay,
46-
apply_decay_param_fun=lambda n: not param_name_to_exclue_from_weight_decay.match(n),
46+
apply_decay_param_fun=lambda n: not param_name_to_exclude_from_weight_decay.match(n),
4747
grad_clip=paddle.nn.ClipGradByGlobalNorm(config.grad_clip),
4848
)
4949
return optimizer

examples/text_to_sql/RAT-SQL/text2sql/utils/dusql_evaluation.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -423,11 +423,11 @@ def parse_from(toks, start_idx, tables_with_alias, schema):
423423
assert toks[idx] == ")"
424424
idx += 1
425425
if idx < len_ and toks[idx] == "a":
426-
assert last_table is not None, "last_table should be a table name strin, not None"
426+
assert last_table is not None, "last_table should be a table name string, not None"
427427
tables_with_alias["a"] = last_table
428428
idx += 2
429429
elif idx < len_ and toks[idx] == "b":
430-
assert last_table is not None, "last_table should be a table name strin, not None"
430+
assert last_table is not None, "last_table should be a table name string, not None"
431431
tables_with_alias["b"] = last_table
432432
idx += 1
433433
if idx < len_ and (toks[idx] in CLAUSE_KEYWORDS or toks[idx] in (")", ";")):
@@ -675,7 +675,7 @@ def __eval_from_sql(pred_tables, gold_tables):
675675
# return 1
676676

677677
def eval_exact_match(self, pred, gold):
678-
"""wrapper of evaluate examct match, to process
678+
"""wrapper of evaluate exact match, to process
679679
`SQL1 intersect/union SQL2` vs `SQL2 intersect/union SQL1`
680680
"""
681681
score = self._eval_exact_match(pred, gold)

examples/text_to_sql/RAT-SQL/text2sql/utils/nn_utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def lstm_init(num_layers, hidden_size, *batch_sizes):
5353

5454

5555
def batch_gather_2d(var, indices):
56-
"""Gather slices from var in each batch, according to corrensponding
56+
"""Gather slices from var in each batch, according to corresponding
5757
index in indices. Currently, it only support 2d Tensor.
5858
5959
Args:
@@ -156,7 +156,7 @@ def pad_sequences_for_3d(seqs, max_col, max_num, dtype=np.int64):
156156

157157

158158
def pad_index_sequences(seqs, max_col, max_row, dtype=np.int64):
159-
"""padding squences for column token indexs"""
159+
"""padding sequences for column token indexes"""
160160
padded = []
161161
for query in seqs:
162162
new_cols = []

examples/text_to_sql/RAT-SQL/text2sql/utils/text_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ def search_values(cls, question, table):
191191
all_candidate = []
192192
for col_id in range(len(table.header)):
193193
header = table.header[col_id]
194-
# 提取col出现在quesiton中的cell
194+
# 提取col出现在question中的cell
195195
# TODO 这里存在一个问题,一个text类型cell必须完全在question中出现才会被当做候选cell
196196
value_in_column = cls.extract_values_from_column(question, table, col_id, header.type)
197197
if header.type == "text":

examples/text_to_sql/RAT-SQL/text2sql/utils/utils.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,10 @@ def count_file_lines(filename):
8181
return cnt
8282

8383

84-
def print_tensors(tag="*", **kwrags):
85-
"""print tensors for debuging"""
84+
def print_tensors(tag="*", **kwargs):
85+
"""print tensors for debugging"""
8686
print(tag * 50)
87-
for key, value in kwrags.items():
87+
for key, value in kwargs.items():
8888
print(key, ":", value)
8989

9090

examples/word_embedding/train.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def forward(self, text, seq_len=None):
142142
# Loads dataset.
143143
train_ds, dev_ds = load_dataset("chnsenticorp", splits=["train", "dev"])
144144

145-
# Constructs the newtork.
145+
# Constructs the network.
146146
model = BoWModel(
147147
vocab_size=len(vocab),
148148
num_classes=len(train_ds.label_list),

paddlenlp/ops/einsum.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def einsum(equation, *operands):
2828
Uses uncased letters to specify the dimension of the operands and result. The input
2929
equation is on the left hand before `->` while the output equation is on the right side.
3030
Einsum can infer the result shape so that the `->` and the result label letters can be omitted.
31-
Operands in the input equation are splited by commas (','), e.g. 'abc,cde' describes two 3D
31+
Operands in the input equation are splitted by commas (','), e.g. 'abc,cde' describes two 3D
3232
operands. The dimensions labeled with same letter should be same or be 1. Ellipsis ('...') can
3333
be used to specify the broadcast dimensions.
3434
@@ -129,14 +129,14 @@ def _mul_sum(left, right, sum_dims):
129129
is_right_summed_dim = right.shape[i] > 1
130130
if i in sum_dims_set:
131131
if is_left_summed_dim and is_right_summed_dim:
132-
assert left.shape[i] == right.shape[i], "Non-brocast dim should be equal."
132+
assert left.shape[i] == right.shape[i], "Non-broadcast dim should be equal."
133133
summed_size *= left.shape[i]
134134
elif is_left_summed_dim:
135135
left = left.sum(axis=i, keepdim=True)
136136
elif is_right_summed_dim:
137137
right = right.sum(axis=i, keepdim=True)
138138
elif is_left_summed_dim and is_right_summed_dim:
139-
assert left.shape[i] == right.shape[i], "Non-brocast dim should be equal."
139+
assert left.shape[i] == right.shape[i], "Non-broadcast dim should be equal."
140140
batch_dims.append(i)
141141
batch_size *= left.shape[i]
142142
elif is_left_summed_dim:
@@ -204,7 +204,7 @@ def _mul_sum(left, right, sum_dims):
204204
for ch in term:
205205
if ch == ".":
206206
ell_char_count += 1
207-
assert ell_char_count <= 3, "The '.' should only exist in one ellispis '...' in term {}".format(term)
207+
assert ell_char_count <= 3, "The '.' should only exist in one ellipsis '...' in term {}".format(term)
208208
if ell_char_count == 3:
209209
if num_ell_idxes == -1:
210210
num_ell_idxes = curr_num_ell_idxes
@@ -213,7 +213,7 @@ def _mul_sum(left, right, sum_dims):
213213
else:
214214
assert (
215215
curr_num_ell_idxes == num_ell_idxes
216-
), "Ellispis in all terms should represent same dimensions ({}).".format(num_ell_idxes)
216+
), "Ellipsis in all terms should represent same dimensions ({}).".format(num_ell_idxes)
217217

218218
for j in range(num_ell_idxes):
219219
curr_operand_idxes.append(j + first_ell_idx)
@@ -247,11 +247,11 @@ def _mul_sum(left, right, sum_dims):
247247
for ch in output_eqn:
248248
if ch == ".":
249249
ell_char_count += 1
250-
assert ell_char_count <= 3, "The '.' should only exist in one ellispis '...' in term {}".format(
250+
assert ell_char_count <= 3, "The '.' should only exist in one ellipsis '...' in term {}".format(
251251
output_eqn
252252
)
253253
if ell_char_count == 3:
254-
assert num_ell_idxes > -1, "Input equation '{}' don't have ellispis.".format(input_eqn)
254+
assert num_ell_idxes > -1, "Input equation '{}' don't have ellipsis.".format(input_eqn)
255255
for j in range(num_ell_idxes):
256256
idxes_to_output_dims[first_ell_idx + j] = num_output_dims
257257
num_output_dims += 1

paddlenlp/prompt/prompt_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class PromptDataCollatorWithPadding:
4444
pad the inputs to the longest sequence in the batch.
4545
4646
Args:
47-
tokenizer (`paddlennlp.transformers.PretrainedTokenizer`):
47+
tokenizer (`paddlenlp.transformers.PretrainedTokenizer`):
4848
The tokenizer used for encoding the data from PromptTokenizer.
4949
"""
5050

paddlenlp/prompt/template.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ def parse_soft_prompt(self):
507507
`Tuple[Dict[int, int], List[List[int]], int]`:
508508
- Mapping from continuous ids to word ids for initialization.
509509
- Continuous ids for each part. Id 0 denotes none-continuous part.
510-
- Number of unique coutinuous tokens.
510+
- Number of unique continuous tokens.
511511
"""
512512
prompt = self._prompt.copy()
513513
num_soft_token = 1
@@ -560,15 +560,15 @@ def parse_soft_prompt(self):
560560
else:
561561
soft_id_reindex[part["soft_id"]] = soft_id_list
562562

563-
# Deal with continous prompt defined by `soft_id`.
563+
# Deal with continuous prompt defined by `soft_id`.
564564
elif "soft_id" in part and part["soft_id"] in soft_id_reindex:
565565
soft_id_list = soft_id_reindex[part["soft_id"]]
566566
if "length" in part:
567567
logger.warning("Ignore `length` because it is incompatible with existing `soft_id`.")
568568
soft_token_ids.append(soft_id_list)
569569
part_prompt = {"soft": [self.tokenizer.unk_token] * len(soft_id_list)}
570570

571-
# Deal with continous prompt with random initialization.
571+
# Deal with continuous prompt with random initialization.
572572
else:
573573
if "length" not in part:
574574
part["length"] = 1

paddlenlp/seq2vec/encoder.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def forward(self, inputs, mask=None):
260260
Shape as `(batch_size, num_tokens, emb_dim)` and dtype as `float32` or `float64`.
261261
Tensor containing the features of the input sequence.
262262
mask (Tensor, optional):
263-
Shape shoule be same as `inputs` and dtype as `int32`, `int64`, `float32` or `float64`.
263+
Shape should be same as `inputs` and dtype as `int32`, `int64`, `float32` or `float64`.
264264
Its each elements identify whether the corresponding input token is padding or not.
265265
If True, not padding token. If False, padding token.
266266
Defaults to `None`.
@@ -319,7 +319,7 @@ class GRUEncoder(nn.Layer):
319319
Defaults to 1.
320320
direction (str, optional):
321321
The direction of the network. It can be "forward" and "bidirect"
322-
(it means bidirection network). If "bidirect", it is a birectional GRU,
322+
(it means bidirection network). If "bidirect", it is a bidirectional GRU,
323323
and returns the concat output from both directions.
324324
Defaults to "forward".
325325
dropout (float, optional):
@@ -504,7 +504,7 @@ class LSTMEncoder(nn.Layer):
504504
Defaults to 1.
505505
direction (str, optional):
506506
The direction of the network. It can be "forward" or "bidirect" (it means bidirection network).
507-
If "bidirect", it is a birectional LSTM, and returns the concat output from both directions.
507+
If "bidirect", it is a bidirectional LSTM, and returns the concat output from both directions.
508508
Defaults to "forward".
509509
dropout (float, optional):
510510
If non-zero, introduces a Dropout layer on the outputs of each LSTM layer
@@ -688,7 +688,7 @@ class RNNEncoder(nn.Layer):
688688
Defaults to 1.
689689
direction (str, optional):
690690
The direction of the network. It can be "forward" and "bidirect"
691-
(it means bidirection network). If "biderect", it is a birectional RNN,
691+
(it means bidirection network). If "bidirect", it is a bidirectional RNN,
692692
and returns the concat output from both directions. Defaults to "forward"
693693
dropout (float, optional):
694694
If non-zero, introduces a Dropout layer on the outputs of each RNN layer

paddlenlp/taskflow/dependency_parsing.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def __init__(
163163
else:
164164
raise ValueError(
165165
"The encoding model should be one of \
166-
ddparser, ddparser-ernie-1.0 and ddoarser-ernie-gram-zh"
166+
ddparser, ddparser-ernie-1.0 and ddparser-ernie-gram-zh"
167167
)
168168
self._check_task_files()
169169
self._construct_vocabs()
@@ -528,9 +528,9 @@ def eisner(scores, mask):
528528
s_i = np.full_like(scores, float("-inf"))
529529
# Score for complete span
530530
s_c = np.full_like(scores, float("-inf"))
531-
# Incompelte span position for backtrack
531+
# Incomplete span position for backtrack
532532
p_i = np.zeros((seq_len, seq_len, batch_size), dtype=np.int64)
533-
# Compelte span position for backtrack
533+
# Complete span position for backtrack
534534
p_c = np.zeros((seq_len, seq_len, batch_size), dtype=np.int64)
535535
# Set 0 to s_c.diagonal
536536
s_c = fill_diagonal(s_c, 0)

paddlenlp/taskflow/models/dependency_parsing_model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def index_sample(x, index):
217217
arr_index = paddle.arange(start=0, end=len(index), dtype=index.dtype)
218218
arr_index = paddle.unsqueeze(arr_index, axis=[1, 2])
219219
arr_index = paddle.expand(arr_index, index.shape)
220-
# Genrate new index
220+
# Generate new index
221221
new_index = paddle.concat((arr_index, index), -1)
222222
new_index = paddle.reshape(new_index, (-1, 2))
223223
# Get output

paddlenlp/taskflow/models/sentiment_analysis_model.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class BoWModel(nn.Layer):
3434
padding_idx(int, optional): The padding value in the embedding, the padding_idx of embedding value will
3535
not be updated, the default value is 0.
3636
hidden_size(int, optional): The output size of linear that after the bow, default value is 128.
37-
fc_hidden_size(int, optional): The output size of linear that after the fisrt linear, default value is 96.
37+
fc_hidden_size(int, optional): The output size of linear that after the first linear, default value is 96.
3838
"""
3939

4040
def __init__(self, vocab_size, num_classes, emb_dim=128, padding_idx=0, hidden_size=128, fc_hidden_size=96):
@@ -71,15 +71,15 @@ class LSTMModel(nn.Layer):
7171
which is passed through some feed-forward layers to output a logits (`output_layer`).
7272
Args:
7373
vocab_size(int): The vocab size that used to create the embedding.
74-
num_class(int): The num clas of the classifier.
74+
num_class(int): The num class of the classifier.
7575
emb_dim(int. optional): The size of the embedding, default value is 128.
7676
padding_idx(int, optional): The padding value in the embedding, the padding_idx of embedding value will
7777
not be updated, the default value is 0.
78-
lstm_hidden_size(int, optional): The output size of the lstm, defalut value 198.
78+
lstm_hidden_size(int, optional): The output size of the lstm, default value 198.
7979
direction(string, optional): The direction of lstm, default value is `forward`.
8080
lstm_layers(string, optional): The num of lstm layer.
8181
dropout(float, optional): The dropout rate of lstm.
82-
pooling_type(float, optional): The pooling type of lstm. Defalut value is None,
82+
pooling_type(float, optional): The pooling type of lstm. Default value is None,
8383
if `pooling_type` is None, then the LSTMEncoder will return the hidden state of the last time step at last layer as a single vector.
8484
"""
8585

0 commit comments

Comments
 (0)