Skip to content

Commit

Permalink
Merge pull request #1505 from sdroege/unicollate-non-utf8
Browse files Browse the repository at this point in the history
glib: Collation keys are not necessary valid UTF-8
  • Loading branch information
sdroege authored Sep 9, 2024
2 parents d3aa5fc + 8c44ec1 commit d34db6b
Show file tree
Hide file tree
Showing 2 changed files with 174 additions and 11 deletions.
87 changes: 76 additions & 11 deletions glib/src/unicollate.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// Take a look at the license at the top of the repository in the LICENSE file.

use crate::{ffi, translate::*};
use crate::ffi;

// rustdoc-stripper-ignore-next
/// A `CollationKey` allows ordering strings using the linguistically correct rules for the current locale.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct CollationKey(crate::GString);
pub struct CollationKey(crate::Slice<u8>);

impl<T: AsRef<str>> From<T> for CollationKey {
// rustdoc-stripper-ignore-next
Expand All @@ -15,10 +15,10 @@ impl<T: AsRef<str>> From<T> for CollationKey {
fn from(s: T) -> Self {
let s = s.as_ref();
let key = unsafe {
from_glib_full(ffi::g_utf8_collate_key(
s.as_ptr() as *const _,
s.len() as isize,
))
let ptr = ffi::g_utf8_collate_key(s.as_ptr() as *const _, s.len() as isize);
let len = libc::strlen(ptr);

crate::Slice::from_glib_full_num(ptr as *mut u8, len)
};
Self(key)
}
Expand All @@ -29,7 +29,7 @@ impl<T: AsRef<str>> From<T> for CollationKey {
/// Compared to `CollationKey`, filename collation keys take into consideration dots and other characters
/// commonly found in file names.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct FilenameCollationKey(crate::GString);
pub struct FilenameCollationKey(crate::Slice<u8>);

impl<T: AsRef<str>> From<T> for FilenameCollationKey {
// rustdoc-stripper-ignore-next
Expand All @@ -39,10 +39,11 @@ impl<T: AsRef<str>> From<T> for FilenameCollationKey {
fn from(s: T) -> Self {
let s = s.as_ref();
let key = unsafe {
from_glib_full(ffi::g_utf8_collate_key_for_filename(
s.as_ptr() as *const _,
s.len() as isize,
))
let ptr =
ffi::g_utf8_collate_key_for_filename(s.as_ptr() as *const _, s.len() as isize);
let len = libc::strlen(ptr);

crate::Slice::from_glib_full_num(ptr as *mut u8, len)
};
Self(key)
}
Expand Down Expand Up @@ -71,6 +72,25 @@ mod tests {
assert_eq!(unsorted, sorted);
}

#[test]
fn collate_non_ascii() {
let mut unsorted = vec![
String::from("猫の手も借りたい"),
String::from("日本語は難しい"),
String::from("ありがとう"),
];

let sorted = vec![
String::from("ありがとう"),
String::from("日本語は難しい"),
String::from("猫の手も借りたい"),
];

unsorted.sort_by(|s1, s2| CollationKey::from(&s1).cmp(&CollationKey::from(&s2)));

assert_eq!(unsorted, sorted);
}

#[test]
fn collate_filenames() {
let mut unsorted = vec![
Expand All @@ -91,4 +111,49 @@ mod tests {

assert_eq!(unsorted, sorted);
}

#[test]
fn collate_filenames_non_ascii() {
let mut unsorted = vec![
String::from("猫の手も借りたい.foo"),
String::from("日本語は難しい.bar"),
String::from("ありがとう.baz"),
];

let sorted = vec![
String::from("ありがとう.baz"),
String::from("日本語は難しい.bar"),
String::from("猫の手も借りたい.foo"),
];

unsorted.sort_by(|s1, s2| {
FilenameCollationKey::from(&s1).cmp(&FilenameCollationKey::from(&s2))
});

assert_eq!(unsorted, sorted);
}

#[test]
fn collate_filenames_from_path() {
use std::path::PathBuf;

let mut unsorted = vec![
PathBuf::from("猫の手も借りたい.foo"),
PathBuf::from("日本語は難しい.bar"),
PathBuf::from("ありがとう.baz"),
];

let sorted = vec![
PathBuf::from("ありがとう.baz"),
PathBuf::from("日本語は難しい.bar"),
PathBuf::from("猫の手も借りたい.foo"),
];

unsorted.sort_by(|s1, s2| {
FilenameCollationKey::from(&s1.to_string_lossy())
.cmp(&FilenameCollationKey::from(&s2.to_string_lossy()))
});

assert_eq!(unsorted, sorted);
}
}
98 changes: 98 additions & 0 deletions glib/tests/unicollate.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
use glib::{CollationKey, FilenameCollationKey};

fn init() {
use std::sync::Once;
static ONCE: Once = Once::new();

// Make sure that all tests below are running with the system
// locale and not the "C" locale.
ONCE.call_once(|| unsafe {
libc::setlocale(libc::LC_ALL, b"\0".as_ptr() as *const _);
});
}

#[test]
fn collate() {
init();

let mut unsorted = vec![
String::from("bcd"),
String::from("cde"),
String::from("abc"),
];

let sorted = vec![
String::from("abc"),
String::from("bcd"),
String::from("cde"),
];

unsorted.sort_by(|s1, s2| CollationKey::from(&s1).cmp(&CollationKey::from(&s2)));

assert_eq!(unsorted, sorted);
}

#[test]
fn collate_non_ascii() {
init();

let mut unsorted = vec![
String::from("猫の手も借りたい"),
String::from("日本語は難しい"),
String::from("ありがとう"),
];

let sorted = vec![
String::from("ありがとう"),
String::from("日本語は難しい"),
String::from("猫の手も借りたい"),
];

unsorted.sort_by(|s1, s2| CollationKey::from(&s1).cmp(&CollationKey::from(&s2)));

assert_eq!(unsorted, sorted);
}

#[test]
fn collate_filenames() {
init();

let mut unsorted = vec![
String::from("bcd.a"),
String::from("cde.b"),
String::from("abc.c"),
];

let sorted = vec![
String::from("abc.c"),
String::from("bcd.a"),
String::from("cde.b"),
];

unsorted
.sort_by(|s1, s2| FilenameCollationKey::from(&s1).cmp(&FilenameCollationKey::from(&s2)));

assert_eq!(unsorted, sorted);
}

#[test]
fn collate_filenames_non_ascii() {
init();

let mut unsorted = vec![
String::from("猫の手も借りたい.foo"),
String::from("日本語は難しい.bar"),
String::from("ありがとう.baz"),
];

let sorted = vec![
String::from("ありがとう.baz"),
String::from("日本語は難しい.bar"),
String::from("猫の手も借りたい.foo"),
];

unsorted
.sort_by(|s1, s2| FilenameCollationKey::from(&s1).cmp(&FilenameCollationKey::from(&s2)));

assert_eq!(unsorted, sorted);
}

0 comments on commit d34db6b

Please sign in to comment.