@@ -24,21 +24,35 @@ declare_lint! {
24
24
crate_level_only
25
25
}
26
26
27
- declare_lint_pass ! ( NonAsciiIdents => [ NON_ASCII_IDENTS , UNCOMMON_CODEPOINTS , CONFUSABLE_IDENTS ] ) ;
27
+ declare_lint ! {
28
+ pub MIXED_SCRIPT_CONFUSABLES ,
29
+ Warn ,
30
+ "detects Unicode scripts whose mixed script confusables codepoints are solely used" ,
31
+ crate_level_only
32
+ }
33
+
34
+ declare_lint_pass ! ( NonAsciiIdents => [ NON_ASCII_IDENTS , UNCOMMON_CODEPOINTS , CONFUSABLE_IDENTS , MIXED_SCRIPT_CONFUSABLES ] ) ;
28
35
29
36
impl EarlyLintPass for NonAsciiIdents {
30
37
fn check_crate ( & mut self , cx : & EarlyContext < ' _ > , _: & ast:: Crate ) {
31
38
use rustc_session:: lint:: Level ;
32
39
use rustc_span:: Span ;
40
+ use std:: collections:: BTreeMap ;
33
41
use unicode_security:: GeneralSecurityProfile ;
34
42
use utils:: CowBoxSymStr ;
35
43
36
44
let check_non_ascii_idents = cx. builder . lint_level ( NON_ASCII_IDENTS ) . 0 != Level :: Allow ;
37
45
let check_uncommon_codepoints =
38
46
cx. builder . lint_level ( UNCOMMON_CODEPOINTS ) . 0 != Level :: Allow ;
39
47
let check_confusable_idents = cx. builder . lint_level ( CONFUSABLE_IDENTS ) . 0 != Level :: Allow ;
48
+ let check_mixed_script_confusables =
49
+ cx. builder . lint_level ( MIXED_SCRIPT_CONFUSABLES ) . 0 != Level :: Allow ;
40
50
41
- if !check_non_ascii_idents && !check_uncommon_codepoints && !check_confusable_idents {
51
+ if !check_non_ascii_idents
52
+ && !check_uncommon_codepoints
53
+ && !check_confusable_idents
54
+ && !check_mixed_script_confusables
55
+ {
42
56
return ;
43
57
}
44
58
@@ -107,6 +121,115 @@ impl EarlyLintPass for NonAsciiIdents {
107
121
. or_insert ( ( symbol_str, sp, is_ascii) ) ;
108
122
}
109
123
}
124
+
125
+ if has_non_ascii_idents && check_mixed_script_confusables {
126
+ use unicode_security:: is_potential_mixed_script_confusable_char;
127
+ use unicode_security:: mixed_script:: AugmentedScriptSet ;
128
+
129
+ #[ derive( Clone ) ]
130
+ enum ScriptSetUsage {
131
+ Suspicious ( Vec < char > , Span ) ,
132
+ Verified ,
133
+ }
134
+
135
+ let mut script_states: FxHashMap < AugmentedScriptSet , ScriptSetUsage > =
136
+ FxHashMap :: default ( ) ;
137
+ let latin_augmented_script_set = AugmentedScriptSet :: for_char ( 'A' ) ;
138
+ script_states. insert ( latin_augmented_script_set, ScriptSetUsage :: Verified ) ;
139
+
140
+ let mut has_suspicous = false ;
141
+ for ( symbol, & sp) in symbols. iter ( ) {
142
+ let symbol_str = symbol. as_str ( ) ;
143
+ for ch in symbol_str. chars ( ) {
144
+ if ch. is_ascii ( ) {
145
+ // all ascii characters are covered by exception.
146
+ continue ;
147
+ }
148
+ if !GeneralSecurityProfile :: identifier_allowed ( ch) {
149
+ // this character is covered by `uncommon_codepoints` lint.
150
+ continue ;
151
+ }
152
+ let augmented_script_set = AugmentedScriptSet :: for_char ( ch) ;
153
+ script_states
154
+ . entry ( augmented_script_set)
155
+ . and_modify ( |existing_state| {
156
+ if let ScriptSetUsage :: Suspicious ( ch_list, _) = existing_state {
157
+ if is_potential_mixed_script_confusable_char ( ch) {
158
+ ch_list. push ( ch) ;
159
+ } else {
160
+ * existing_state = ScriptSetUsage :: Verified ;
161
+ }
162
+ }
163
+ } )
164
+ . or_insert_with ( || {
165
+ if !is_potential_mixed_script_confusable_char ( ch) {
166
+ ScriptSetUsage :: Verified
167
+ } else {
168
+ has_suspicous = true ;
169
+ ScriptSetUsage :: Suspicious ( vec ! [ ch] , sp)
170
+ }
171
+ } ) ;
172
+ }
173
+ }
174
+
175
+ if has_suspicous {
176
+ let verified_augmented_script_sets = script_states
177
+ . iter ( )
178
+ . flat_map ( |( k, v) | match v {
179
+ ScriptSetUsage :: Verified => Some ( * k) ,
180
+ _ => None ,
181
+ } )
182
+ . collect :: < Vec < _ > > ( ) ;
183
+
184
+ // we're sorting the output here.
185
+ let mut lint_reports: BTreeMap < ( Span , Vec < char > ) , AugmentedScriptSet > =
186
+ BTreeMap :: new ( ) ;
187
+
188
+ ' outerloop: for ( augment_script_set, usage) in script_states {
189
+ let ( mut ch_list, sp) = match usage {
190
+ ScriptSetUsage :: Verified => continue ,
191
+ ScriptSetUsage :: Suspicious ( ch_list, sp) => ( ch_list, sp) ,
192
+ } ;
193
+
194
+ if augment_script_set. is_all ( ) {
195
+ continue ;
196
+ }
197
+
198
+ for existing in verified_augmented_script_sets. iter ( ) {
199
+ if existing. is_all ( ) {
200
+ continue ;
201
+ }
202
+ let mut intersect = * existing;
203
+ intersect. intersect_with ( augment_script_set) ;
204
+ if !intersect. is_empty ( ) && !intersect. is_all ( ) {
205
+ continue ' outerloop;
206
+ }
207
+ }
208
+
209
+ ch_list. sort ( ) ;
210
+ ch_list. dedup ( ) ;
211
+ lint_reports. insert ( ( sp, ch_list) , augment_script_set) ;
212
+ }
213
+
214
+ for ( ( sp, ch_list) , script_set) in lint_reports {
215
+ cx. struct_span_lint ( MIXED_SCRIPT_CONFUSABLES , sp, |lint| {
216
+ let message = format ! (
217
+ "The usage of Script Group `{}` in this crate consists solely of mixed script confusables" ,
218
+ script_set) ;
219
+ let mut note = "The usage includes " . to_string ( ) ;
220
+ for ( idx, ch) in ch_list. into_iter ( ) . enumerate ( ) {
221
+ if idx != 0 {
222
+ note += ", " ;
223
+ }
224
+ let char_info = format ! ( "'{}' (U+{:04X})" , ch, ch as u32 ) ;
225
+ note += & char_info;
226
+ }
227
+ note += "." ;
228
+ lint. build ( & message) . note ( & note) . note ( "Please recheck to make sure their usages are indeed what you want." ) . emit ( )
229
+ } ) ;
230
+ }
231
+ }
232
+ }
110
233
}
111
234
}
112
235
0 commit comments