Skip to content

Commit db14a17

Browse files
committed
Auto merge of #90462 - pietroalbini:bidi-master, r=nikomatsakis,pietroalbini
[master] Fix CVE-2021-42574 This PR implements new lints to mitigate the impact of [CVE-2021-42574], caused by the presence of bidirectional-override Unicode codepoints in the compiled source code. [See the advisory][advisory] for more information about the vulnerability. The changes in this PR will be released in tomorrow's nightly release. [CVE-2021-42574]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574 [advisory]: https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html
2 parents ff0e148 + cdd3b86 commit db14a17

File tree

13 files changed

+543
-10
lines changed

13 files changed

+543
-10
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -4259,6 +4259,7 @@ dependencies = [
42594259
"rustc_span",
42604260
"tracing",
42614261
"unicode-normalization",
4262+
"unicode-width",
42624263
]
42634264

42644265
[[package]]

RELEASES.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
Version 1.56.1 (2021-11-01)
2+
===========================
3+
4+
- New lints to detect the presence of bidirectional-override Unicode
5+
codepoints in the compiled source code ([CVE-2021-42574])
6+
7+
[CVE-2021-42574]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574
8+
19
Version 1.56.0 (2021-10-21)
210
========================
311

compiler/rustc_errors/src/emitter.rs

+19-1
Original file line numberDiff line numberDiff line change
@@ -2063,8 +2063,26 @@ fn num_decimal_digits(num: usize) -> usize {
20632063
MAX_DIGITS
20642064
}
20652065

2066+
// We replace some characters so the CLI output is always consistent and underlines aligned.
2067+
const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
2068+
('\t', " "), // We do our own tab replacement
2069+
('\u{202A}', ""), // The following unicode text flow control characters are inconsistently
2070+
('\u{202B}', ""), // supported accross CLIs and can cause confusion due to the bytes on disk
2071+
('\u{202D}', ""), // not corresponding to the visible source code, so we replace them always.
2072+
('\u{202E}', ""),
2073+
('\u{2066}', ""),
2074+
('\u{2067}', ""),
2075+
('\u{2068}', ""),
2076+
('\u{202C}', ""),
2077+
('\u{2069}', ""),
2078+
];
2079+
20662080
fn replace_tabs(str: &str) -> String {
2067-
str.replace('\t', " ")
2081+
let mut s = str.to_string();
2082+
for (c, replacement) in OUTPUT_REPLACEMENTS {
2083+
s = s.replace(*c, replacement);
2084+
}
2085+
s
20682086
}
20692087

20702088
fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {

compiler/rustc_lint/src/context.rs

+38-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
1717
use self::TargetLint::*;
1818

19+
use crate::hidden_unicode_codepoints::UNICODE_TEXT_FLOW_CHARS;
1920
use crate::levels::{is_known_lint_tool, LintLevelsBuilder};
2021
use crate::passes::{EarlyLintPassObject, LateLintPassObject};
2122
use rustc_ast as ast;
@@ -39,7 +40,7 @@ use rustc_session::lint::{BuiltinLintDiagnostics, ExternDepSpec};
3940
use rustc_session::lint::{FutureIncompatibleInfo, Level, Lint, LintBuffer, LintId};
4041
use rustc_session::Session;
4142
use rustc_span::lev_distance::find_best_match_for_name;
42-
use rustc_span::{symbol::Symbol, MultiSpan, Span, DUMMY_SP};
43+
use rustc_span::{symbol::Symbol, BytePos, MultiSpan, Span, DUMMY_SP};
4344
use rustc_target::abi;
4445
use tracing::debug;
4546

@@ -597,6 +598,42 @@ pub trait LintContext: Sized {
597598
// Now, set up surrounding context.
598599
let sess = self.sess();
599600
match diagnostic {
601+
BuiltinLintDiagnostics::UnicodeTextFlow(span, content) => {
602+
let spans: Vec<_> = content
603+
.char_indices()
604+
.filter_map(|(i, c)| {
605+
UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| {
606+
let lo = span.lo() + BytePos(2 + i as u32);
607+
(c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
608+
})
609+
})
610+
.collect();
611+
let (an, s) = match spans.len() {
612+
1 => ("an ", ""),
613+
_ => ("", "s"),
614+
};
615+
db.span_label(span, &format!(
616+
"this comment contains {}invisible unicode text flow control codepoint{}",
617+
an,
618+
s,
619+
));
620+
for (c, span) in &spans {
621+
db.span_label(*span, format!("{:?}", c));
622+
}
623+
db.note(
624+
"these kind of unicode codepoints change the way text flows on \
625+
applications that support them, but can cause confusion because they \
626+
change the order of characters on the screen",
627+
);
628+
if !spans.is_empty() {
629+
db.multipart_suggestion_with_style(
630+
"if their presence wasn't intentional, you can remove them",
631+
spans.into_iter().map(|(_, span)| (span, "".to_string())).collect(),
632+
Applicability::MachineApplicable,
633+
SuggestionStyle::HideCodeAlways,
634+
);
635+
}
636+
},
600637
BuiltinLintDiagnostics::Normal => (),
601638
BuiltinLintDiagnostics::BareTraitObject(span, is_global) => {
602639
let (sugg, app) = match sess.source_map().span_to_snippet(span) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
use crate::{EarlyContext, EarlyLintPass, LintContext};
2+
use rustc_ast as ast;
3+
use rustc_errors::{Applicability, SuggestionStyle};
4+
use rustc_span::{BytePos, Span, Symbol};
5+
6+
declare_lint! {
7+
/// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the
8+
/// visual representation of text on screen in a way that does not correspond to their on
9+
/// memory representation.
10+
///
11+
/// ### Explanation
12+
///
13+
/// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`,
14+
/// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change
15+
/// its direction on software that supports these codepoints. This makes the text "abc" display
16+
/// as "cba" on screen. By leveraging software that supports these, people can write specially
17+
/// crafted literals that make the surrounding code seem like it's performing one action, when
18+
/// in reality it is performing another. Because of this, we proactively lint against their
19+
/// presence to avoid surprises.
20+
///
21+
/// ### Example
22+
///
23+
/// ```rust,compile_fail
24+
/// #![deny(text_direction_codepoint_in_literal)]
25+
/// fn main() {
26+
/// println!("{:?}", '‮');
27+
/// }
28+
/// ```
29+
///
30+
/// {{produces}}
31+
///
32+
pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
33+
Deny,
34+
"detect special Unicode codepoints that affect the visual representation of text on screen, \
35+
changing the direction in which text flows",
36+
}
37+
38+
declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]);
39+
40+
crate const UNICODE_TEXT_FLOW_CHARS: &[char] = &[
41+
'\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}',
42+
'\u{2069}',
43+
];
44+
45+
impl HiddenUnicodeCodepoints {
46+
fn lint_text_direction_codepoint(
47+
&self,
48+
cx: &EarlyContext<'_>,
49+
text: Symbol,
50+
span: Span,
51+
padding: u32,
52+
point_at_inner_spans: bool,
53+
label: &str,
54+
) {
55+
// Obtain the `Span`s for each of the forbidden chars.
56+
let spans: Vec<_> = text
57+
.as_str()
58+
.char_indices()
59+
.filter_map(|(i, c)| {
60+
UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| {
61+
let lo = span.lo() + BytePos(i as u32 + padding);
62+
(c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32)))
63+
})
64+
})
65+
.collect();
66+
67+
cx.struct_span_lint(TEXT_DIRECTION_CODEPOINT_IN_LITERAL, span, |lint| {
68+
let mut err = lint.build(&format!(
69+
"unicode codepoint changing visible direction of text present in {}",
70+
label
71+
));
72+
let (an, s) = match spans.len() {
73+
1 => ("an ", ""),
74+
_ => ("", "s"),
75+
};
76+
err.span_label(
77+
span,
78+
&format!(
79+
"this {} contains {}invisible unicode text flow control codepoint{}",
80+
label, an, s,
81+
),
82+
);
83+
if point_at_inner_spans {
84+
for (c, span) in &spans {
85+
err.span_label(*span, format!("{:?}", c));
86+
}
87+
}
88+
err.note(
89+
"these kind of unicode codepoints change the way text flows on applications that \
90+
support them, but can cause confusion because they change the order of \
91+
characters on the screen",
92+
);
93+
if point_at_inner_spans && !spans.is_empty() {
94+
err.multipart_suggestion_with_style(
95+
"if their presence wasn't intentional, you can remove them",
96+
spans.iter().map(|(_, span)| (*span, "".to_string())).collect(),
97+
Applicability::MachineApplicable,
98+
SuggestionStyle::HideCodeAlways,
99+
);
100+
err.multipart_suggestion(
101+
"if you want to keep them but make them visible in your source code, you can \
102+
escape them",
103+
spans
104+
.into_iter()
105+
.map(|(c, span)| {
106+
let c = format!("{:?}", c);
107+
(span, c[1..c.len() - 1].to_string())
108+
})
109+
.collect(),
110+
Applicability::MachineApplicable,
111+
);
112+
} else {
113+
// FIXME: in other suggestions we've reversed the inner spans of doc comments. We
114+
// should do the same here to provide the same good suggestions as we do for
115+
// literals above.
116+
err.note("if their presence wasn't intentional, you can remove them");
117+
err.note(&format!(
118+
"if you want to keep them but make them visible in your source code, you can \
119+
escape them: {}",
120+
spans
121+
.into_iter()
122+
.map(|(c, _)| { format!("{:?}", c) })
123+
.collect::<Vec<String>>()
124+
.join(", "),
125+
));
126+
}
127+
err.emit();
128+
});
129+
}
130+
}
131+
impl EarlyLintPass for HiddenUnicodeCodepoints {
132+
fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {
133+
if let ast::AttrKind::DocComment(_, comment) = attr.kind {
134+
if comment.as_str().contains(UNICODE_TEXT_FLOW_CHARS) {
135+
self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment");
136+
}
137+
}
138+
}
139+
140+
fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) {
141+
// byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
142+
let (text, span, padding) = match &expr.kind {
143+
ast::ExprKind::Lit(ast::Lit { token, kind, span }) => {
144+
let text = token.symbol;
145+
if !text.as_str().contains(UNICODE_TEXT_FLOW_CHARS) {
146+
return;
147+
}
148+
let padding = match kind {
149+
// account for `"` or `'`
150+
ast::LitKind::Str(_, ast::StrStyle::Cooked) | ast::LitKind::Char(_) => 1,
151+
// account for `r###"`
152+
ast::LitKind::Str(_, ast::StrStyle::Raw(val)) => *val as u32 + 2,
153+
_ => return,
154+
};
155+
(text, span, padding)
156+
}
157+
_ => return,
158+
};
159+
self.lint_text_direction_codepoint(cx, text, *span, padding, true, "literal");
160+
}
161+
}

compiler/rustc_lint/src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ pub mod builtin;
4848
mod context;
4949
mod early;
5050
mod enum_intrinsics_non_enums;
51+
pub mod hidden_unicode_codepoints;
5152
mod internal;
5253
mod late;
5354
mod levels;
@@ -78,6 +79,7 @@ use rustc_span::Span;
7879
use array_into_iter::ArrayIntoIter;
7980
use builtin::*;
8081
use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums;
82+
use hidden_unicode_codepoints::*;
8183
use internal::*;
8284
use methods::*;
8385
use non_ascii_idents::*;
@@ -129,6 +131,7 @@ macro_rules! early_lint_passes {
129131
DeprecatedAttr: DeprecatedAttr::new(),
130132
WhileTrue: WhileTrue,
131133
NonAsciiIdents: NonAsciiIdents,
134+
HiddenUnicodeCodepoints: HiddenUnicodeCodepoints,
132135
IncompleteFeatures: IncompleteFeatures,
133136
RedundantSemicolons: RedundantSemicolons,
134137
UnusedDocComment: UnusedDocComment,

compiler/rustc_lint_defs/src/builtin.rs

+28
Original file line numberDiff line numberDiff line change
@@ -3518,6 +3518,34 @@ declare_lint! {
35183518
@feature_gate = sym::non_exhaustive_omitted_patterns_lint;
35193519
}
35203520

3521+
declare_lint! {
3522+
/// The `text_direction_codepoint_in_comment` lint detects Unicode codepoints in comments that
3523+
/// change the visual representation of text on screen in a way that does not correspond to
3524+
/// their on memory representation.
3525+
///
3526+
/// ### Example
3527+
///
3528+
/// ```rust,compile_fail
3529+
/// #![deny(text_direction_codepoint_in_comment)]
3530+
/// fn main() {
3531+
/// println!("{:?}"); // '‮');
3532+
/// }
3533+
/// ```
3534+
///
3535+
/// {{produces}}
3536+
///
3537+
/// ### Explanation
3538+
///
3539+
/// Unicode allows changing the visual flow of text on screen in order to support scripts that
3540+
/// are written right-to-left, but a specially crafted comment can make code that will be
3541+
/// compiled appear to be part of a comment, depending on the software used to read the code.
3542+
/// To avoid potential problems or confusion, such as in CVE-2021-42574, by default we deny
3543+
/// their use.
3544+
pub TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
3545+
Deny,
3546+
"invisible directionality-changing codepoints in comment"
3547+
}
3548+
35213549
declare_lint! {
35223550
/// The `deref_into_dyn_supertrait` lint is output whenever there is a use of the
35233551
/// `Deref` implementation with a `dyn SuperTrait` type as `Output`.

compiler/rustc_lint_defs/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ pub enum BuiltinLintDiagnostics {
306306
TrailingMacro(bool, Ident),
307307
BreakWithLabelAndLoop(Span),
308308
NamedAsmLabel(String),
309+
UnicodeTextFlow(Span, String),
309310
}
310311

311312
/// Lints that are buffered up early on in the `Session` before the

compiler/rustc_parse/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ rustc_session = { path = "../rustc_session" }
1818
rustc_span = { path = "../rustc_span" }
1919
rustc_ast = { path = "../rustc_ast" }
2020
unicode-normalization = "0.1.11"
21+
unicode-width = "0.1.4"

0 commit comments

Comments
 (0)