Skip to content

Commit 886cef1

Browse files
Rollup merge of rust-lang#148321 - Marcondiro:master, r=Mark-Simulacrum
parser/lexer: bump to Unicode 17, use faster unicode-ident Hello, Bump the unicode version used by lexer/parser to 17.0.0 by updating: - `unicode-normalization` to 0.1.25 - `unicode-properties` to 0.1.4 - `unicode-width` to 0.2.2 and by replacing `unicode-xid` with `unicode-ident` which is also 6 times faster. I think it might be worth to run the benchmarks to double check. (`unicode-ident` is already in `src/tools/tidy/src/deps.rs`) Thanks!
2 parents 21cf7fb + f7cb82e commit 886cef1

File tree

10 files changed

+74
-22
lines changed

10 files changed

+74
-22
lines changed

Cargo.lock

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4144,8 +4144,8 @@ version = "0.0.0"
41444144
dependencies = [
41454145
"expect-test",
41464146
"memchr",
4147+
"unicode-ident",
41474148
"unicode-properties",
4148-
"unicode-xid",
41494149
]
41504150

41514151
[[package]]
@@ -5981,24 +5981,24 @@ checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
59815981

59825982
[[package]]
59835983
name = "unicode-ident"
5984-
version = "1.0.18"
5984+
version = "1.0.22"
59855985
source = "registry+https://github.com/rust-lang/crates.io-index"
5986-
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
5986+
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
59875987

59885988
[[package]]
59895989
name = "unicode-normalization"
5990-
version = "0.1.24"
5990+
version = "0.1.25"
59915991
source = "registry+https://github.com/rust-lang/crates.io-index"
5992-
checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
5992+
checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
59935993
dependencies = [
59945994
"tinyvec",
59955995
]
59965996

59975997
[[package]]
59985998
name = "unicode-properties"
5999-
version = "0.1.3"
5999+
version = "0.1.4"
60006000
source = "registry+https://github.com/rust-lang/crates.io-index"
6001-
checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0"
6001+
checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
60026002

60036003
[[package]]
60046004
name = "unicode-script"

compiler/rustc_lexer/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ Rust lexer used by rustc. No stability guarantees are provided.
1515
# Note that this crate purposefully does not depend on other rustc crates
1616
[dependencies]
1717
memchr = "2.7.6"
18-
unicode-properties = { version = "0.1.0", default-features = false, features = ["emoji"] }
19-
unicode-xid = "0.2.0"
18+
unicode-properties = { version = "0.1.4", default-features = false, features = ["emoji"] }
19+
unicode-ident = "1.0.22"
2020

2121
[dev-dependencies]
2222
expect-test = "1.4.0"

compiler/rustc_lexer/src/lib.rs

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,25 @@ use LiteralKind::*;
3434
use TokenKind::*;
3535
use cursor::EOF_CHAR;
3636
pub use cursor::{Cursor, FrontmatterAllowed};
37+
pub use unicode_ident::UNICODE_VERSION;
3738
use unicode_properties::UnicodeEmoji;
38-
pub use unicode_xid::UNICODE_VERSION as UNICODE_XID_VERSION;
39+
40+
// Make sure that the Unicode version of the dependencies is the same.
41+
const _: () = {
42+
let properties = unicode_properties::UNICODE_VERSION;
43+
let ident = unicode_ident::UNICODE_VERSION;
44+
45+
if properties.0 != ident.0 as u64
46+
|| properties.1 != ident.1 as u64
47+
|| properties.2 != ident.2 as u64
48+
{
49+
panic!(
50+
"unicode-properties and unicode-ident must use the same Unicode version, \
51+
`unicode_properties::UNICODE_VERSION` and `unicode_ident::UNICODE_VERSION` are \
52+
different."
53+
);
54+
}
55+
};
3956

4057
/// Parsed token.
4158
/// It doesn't contain information about data that has been parsed,
@@ -370,14 +387,14 @@ pub fn is_horizontal_whitespace(c: char) -> bool {
370387
/// a formal definition of valid identifier name.
371388
pub fn is_id_start(c: char) -> bool {
372389
// This is XID_Start OR '_' (which formally is not a XID_Start).
373-
c == '_' || unicode_xid::UnicodeXID::is_xid_start(c)
390+
c == '_' || unicode_ident::is_xid_start(c)
374391
}
375392

376393
/// True if `c` is valid as a non-first character of an identifier.
377394
/// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
378395
/// a formal definition of valid identifier name.
379396
pub fn is_id_continue(c: char) -> bool {
380-
unicode_xid::UnicodeXID::is_xid_continue(c)
397+
unicode_ident::is_xid_continue(c)
381398
}
382399

383400
/// The passed string is lexically an identifier.

compiler/rustc_parse/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ rustc_session = { path = "../rustc_session" }
2020
rustc_span = { path = "../rustc_span" }
2121
thin-vec = "0.2.12"
2222
tracing = "0.1"
23-
unicode-normalization = "0.1.11"
24-
unicode-width = "0.2.0"
23+
unicode-normalization = "0.1.25"
24+
unicode-width = "0.2.2"
2525
# tidy-alphabetical-end
2626

2727
[dev-dependencies]

compiler/rustc_parse/src/lib.rs

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ use rustc_ast::token;
2222
use rustc_ast::tokenstream::TokenStream;
2323
use rustc_ast_pretty::pprust;
2424
use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult, pluralize};
25+
pub use rustc_lexer::UNICODE_VERSION;
2526
use rustc_session::parse::ParseSess;
2627
use rustc_span::source_map::SourceMap;
2728
use rustc_span::{FileName, SourceFile, Span};
28-
pub use unicode_normalization::UNICODE_VERSION as UNICODE_NORMALIZATION_VERSION;
2929

3030
pub const MACRO_ARGUMENTS: Option<&str> = Some("macro arguments");
3131

@@ -39,6 +39,44 @@ pub mod lexer;
3939

4040
mod errors;
4141

42+
// Make sure that the Unicode version of the dependencies is the same.
43+
const _: () = {
44+
let rustc_lexer = rustc_lexer::UNICODE_VERSION;
45+
let rustc_span = rustc_span::UNICODE_VERSION;
46+
let normalization = unicode_normalization::UNICODE_VERSION;
47+
let width = unicode_width::UNICODE_VERSION;
48+
49+
if rustc_lexer.0 != rustc_span.0
50+
|| rustc_lexer.1 != rustc_span.1
51+
|| rustc_lexer.2 != rustc_span.2
52+
{
53+
panic!(
54+
"rustc_lexer and rustc_span must use the same Unicode version, \
55+
`rustc_lexer::UNICODE_VERSION` and `rustc_span::UNICODE_VERSION` are \
56+
different."
57+
);
58+
}
59+
60+
if rustc_lexer.0 != normalization.0
61+
|| rustc_lexer.1 != normalization.1
62+
|| rustc_lexer.2 != normalization.2
63+
{
64+
panic!(
65+
"rustc_lexer and unicode-normalization must use the same Unicode version, \
66+
`rustc_lexer::UNICODE_VERSION` and `unicode_normalization::UNICODE_VERSION` are \
67+
different."
68+
);
69+
}
70+
71+
if rustc_lexer.0 != width.0 || rustc_lexer.1 != width.1 || rustc_lexer.2 != width.2 {
72+
panic!(
73+
"rustc_lexer and unicode-width must use the same Unicode version, \
74+
`rustc_lexer::UNICODE_VERSION` and `unicode_width::UNICODE_VERSION` are \
75+
different."
76+
);
77+
}
78+
};
79+
4280
rustc_fluent_macro::fluent_messages! { "../messages.ftl" }
4381

4482
// Unwrap the result if `Ok`, otherwise emit the diagnostics and abort.

compiler/rustc_span/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,5 @@ scoped-tls = "1.0"
2121
sha1 = "0.10.0"
2222
sha2 = "0.10.1"
2323
tracing = "0.1"
24-
unicode-width = "0.2.0"
24+
unicode-width = "0.2.2"
2525
# tidy-alphabetical-end

compiler/rustc_span/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ use rustc_macros::{Decodable, Encodable, HashStable_Generic};
3939
use rustc_serialize::opaque::{FileEncoder, MemDecoder};
4040
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
4141
use tracing::debug;
42+
pub use unicode_width::UNICODE_VERSION;
4243

4344
mod caching_source_map_view;
4445
pub mod source_map;

src/tools/tidy/src/deps.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,6 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
466466
"unicode-script",
467467
"unicode-security",
468468
"unicode-width",
469-
"unicode-xid",
470469
"utf8parse",
471470
"valuable",
472471
"version_check",

tests/ui-fulldeps/lexer/unicode-version.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
#![feature(rustc_private)]
1313

1414
extern crate rustc_driver;
15-
extern crate rustc_lexer;
1615
extern crate rustc_parse;
1716

1817
fn main() {
@@ -22,6 +21,5 @@ fn main() {
2221
it should also be updated in the reference at \
2322
https://github.com/rust-lang/reference/blob/HEAD/src/identifiers.md."
2423
);
25-
println!("Unicode XID version is: {:?}", rustc_lexer::UNICODE_XID_VERSION);
26-
println!("Unicode normalization version is: {:?}", rustc_parse::UNICODE_NORMALIZATION_VERSION);
24+
println!("Unicode version used in rustc_parse is: {:?}", rustc_parse::UNICODE_VERSION);
2725
}
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
Checking if Unicode version changed.
22
If the Unicode version changes are intentional, it should also be updated in the reference at https://github.com/rust-lang/reference/blob/HEAD/src/identifiers.md.
3-
Unicode XID version is: (16, 0, 0)
4-
Unicode normalization version is: (16, 0, 0)
3+
Unicode version used in rustc_parse is: (17, 0, 0)

0 commit comments

Comments
 (0)