|
| 1 | +use crate::{EarlyContext, EarlyLintPass, LintContext}; |
| 2 | +use rustc_ast as ast; |
| 3 | +use rustc_errors::{Applicability, SuggestionStyle}; |
| 4 | +use rustc_span::{BytePos, Span, Symbol}; |
| 5 | + |
| 6 | +declare_lint! { |
| 7 | + /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the |
| 8 | + /// visual representation of text on screen in a way that does not correspond to their on |
| 9 | + /// memory representation. |
| 10 | + /// |
| 11 | + /// ### Explanation |
| 12 | + /// |
| 13 | + /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`, |
| 14 | + /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change |
| 15 | + /// its direction on software that supports these codepoints. This makes the text "abc" display |
| 16 | + /// as "cba" on screen. By leveraging software that supports these, people can write specially |
| 17 | + /// crafted literals that make the surrounding code seem like it's performing one action, when |
| 18 | + /// in reality it is performing another. Because of this, we proactively lint against their |
| 19 | + /// presence to avoid surprises. |
| 20 | + /// |
| 21 | + /// ### Example |
| 22 | + /// |
| 23 | + /// ```rust,compile_fail |
| 24 | + /// #![deny(text_direction_codepoint_in_literal)] |
| 25 | + /// fn main() { |
| 26 | + /// println!("{:?}", ''); |
| 27 | + /// } |
| 28 | + /// ``` |
| 29 | + /// |
| 30 | + /// {{produces}} |
| 31 | + /// |
| 32 | + pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL, |
| 33 | + Deny, |
| 34 | + "detect special Unicode codepoints that affect the visual representation of text on screen, \ |
| 35 | + changing the direction in which text flows", |
| 36 | +} |
| 37 | + |
| 38 | +declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]); |
| 39 | + |
| 40 | +crate const UNICODE_TEXT_FLOW_CHARS: &[char] = &[ |
| 41 | + '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}', |
| 42 | + '\u{2069}', |
| 43 | +]; |
| 44 | + |
| 45 | +impl HiddenUnicodeCodepoints { |
| 46 | + fn lint_text_direction_codepoint( |
| 47 | + &self, |
| 48 | + cx: &EarlyContext<'_>, |
| 49 | + text: Symbol, |
| 50 | + span: Span, |
| 51 | + padding: u32, |
| 52 | + point_at_inner_spans: bool, |
| 53 | + label: &str, |
| 54 | + ) { |
| 55 | + // Obtain the `Span`s for each of the forbidden chars. |
| 56 | + let spans: Vec<_> = text |
| 57 | + .as_str() |
| 58 | + .char_indices() |
| 59 | + .filter_map(|(i, c)| { |
| 60 | + UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| { |
| 61 | + let lo = span.lo() + BytePos(i as u32 + padding); |
| 62 | + (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32))) |
| 63 | + }) |
| 64 | + }) |
| 65 | + .collect(); |
| 66 | + |
| 67 | + cx.struct_span_lint(TEXT_DIRECTION_CODEPOINT_IN_LITERAL, span, |lint| { |
| 68 | + let mut err = lint.build(&format!( |
| 69 | + "unicode codepoint changing visible direction of text present in {}", |
| 70 | + label |
| 71 | + )); |
| 72 | + let (an, s) = match spans.len() { |
| 73 | + 1 => ("an ", ""), |
| 74 | + _ => ("", "s"), |
| 75 | + }; |
| 76 | + err.span_label( |
| 77 | + span, |
| 78 | + &format!( |
| 79 | + "this {} contains {}invisible unicode text flow control codepoint{}", |
| 80 | + label, an, s, |
| 81 | + ), |
| 82 | + ); |
| 83 | + if point_at_inner_spans { |
| 84 | + for (c, span) in &spans { |
| 85 | + err.span_label(*span, format!("{:?}", c)); |
| 86 | + } |
| 87 | + } |
| 88 | + err.note( |
| 89 | + "these kind of unicode codepoints change the way text flows on applications that \ |
| 90 | + support them, but can cause confusion because they change the order of \ |
| 91 | + characters on the screen", |
| 92 | + ); |
| 93 | + if point_at_inner_spans && !spans.is_empty() { |
| 94 | + err.multipart_suggestion_with_style( |
| 95 | + "if their presence wasn't intentional, you can remove them", |
| 96 | + spans.iter().map(|(_, span)| (*span, "".to_string())).collect(), |
| 97 | + Applicability::MachineApplicable, |
| 98 | + SuggestionStyle::HideCodeAlways, |
| 99 | + ); |
| 100 | + err.multipart_suggestion( |
| 101 | + "if you want to keep them but make them visible in your source code, you can \ |
| 102 | + escape them", |
| 103 | + spans |
| 104 | + .into_iter() |
| 105 | + .map(|(c, span)| { |
| 106 | + let c = format!("{:?}", c); |
| 107 | + (span, c[1..c.len() - 1].to_string()) |
| 108 | + }) |
| 109 | + .collect(), |
| 110 | + Applicability::MachineApplicable, |
| 111 | + ); |
| 112 | + } else { |
| 113 | + // FIXME: in other suggestions we've reversed the inner spans of doc comments. We |
| 114 | + // should do the same here to provide the same good suggestions as we do for |
| 115 | + // literals above. |
| 116 | + err.note("if their presence wasn't intentional, you can remove them"); |
| 117 | + err.note(&format!( |
| 118 | + "if you want to keep them but make them visible in your source code, you can \ |
| 119 | + escape them: {}", |
| 120 | + spans |
| 121 | + .into_iter() |
| 122 | + .map(|(c, _)| { format!("{:?}", c) }) |
| 123 | + .collect::<Vec<String>>() |
| 124 | + .join(", "), |
| 125 | + )); |
| 126 | + } |
| 127 | + err.emit(); |
| 128 | + }); |
| 129 | + } |
| 130 | +} |
| 131 | +impl EarlyLintPass for HiddenUnicodeCodepoints { |
| 132 | + fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) { |
| 133 | + if let ast::AttrKind::DocComment(_, comment) = attr.kind { |
| 134 | + if comment.as_str().contains(UNICODE_TEXT_FLOW_CHARS) { |
| 135 | + self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment"); |
| 136 | + } |
| 137 | + } |
| 138 | + } |
| 139 | + |
| 140 | + fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) { |
| 141 | + // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString` |
| 142 | + let (text, span, padding) = match &expr.kind { |
| 143 | + ast::ExprKind::Lit(ast::Lit { token, kind, span }) => { |
| 144 | + let text = token.symbol; |
| 145 | + if !text.as_str().contains(UNICODE_TEXT_FLOW_CHARS) { |
| 146 | + return; |
| 147 | + } |
| 148 | + let padding = match kind { |
| 149 | + // account for `"` or `'` |
| 150 | + ast::LitKind::Str(_, ast::StrStyle::Cooked) | ast::LitKind::Char(_) => 1, |
| 151 | + // account for `r###"` |
| 152 | + ast::LitKind::Str(_, ast::StrStyle::Raw(val)) => *val as u32 + 2, |
| 153 | + _ => return, |
| 154 | + }; |
| 155 | + (text, span, padding) |
| 156 | + } |
| 157 | + _ => return, |
| 158 | + }; |
| 159 | + self.lint_text_direction_codepoint(cx, text, *span, padding, true, "literal"); |
| 160 | + } |
| 161 | +} |
0 commit comments