Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions unic/char/property/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
// except according to those terms.

#![no_std]
#![forbid(bad_style, future_incompatible, missing_debug_implementations, missing_docs,
unconditional_recursion, unsafe_code, unused)]
// #![forbid(bad_style, future_incompatible, missing_debug_implementations, missing_docs,
// unconditional_recursion, unsafe_code, unused)]

//! # UNIC — Unicode Character Tools - Character Property
//!
Expand Down
2 changes: 1 addition & 1 deletion unic/char/property/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ macro_rules! char_property {
impl $prop_name {
/// Get (struct) property value of the character.
pub fn of(ch: char) -> Self {
use $crate::tables::CharDataTable;
use $crate::tables::{CharDataTable, TCharDataTable};
const TABLE: CharDataTable<()> = include!($data_path);
$prop_name(TABLE.contains(ch))
}
Expand Down
92 changes: 86 additions & 6 deletions unic/char/property/src/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,85 @@

//! Character data tables used in UNIC.

use core::ops::Range;

use unic_char_range::CharRange;

pub trait TCharDataTable {
type Item;
fn contains(&self, needle: char) -> bool;
fn find(&self, needle: char) -> Option<Self::Item>;
}

pub struct CharDataTableDirect<V: 'static>(#[doc(hidden)] pub &'static [(char, V)]);
impl<V: 'static> Default for CharDataTableDirect<V> {
fn default() -> CharDataTableDirect<V> { CharDataTableDirect(&[]) }
}
impl<V: 'static + Copy> TCharDataTable for CharDataTableDirect<V> {
type Item = V;
fn contains(&self, needle: char) -> bool {
self.0
.binary_search_by_key(&needle, |&(k, _)| k)
.is_ok()
}
fn find(&self, needle: char) -> Option<V> {
self.0
.binary_search_by_key(&needle, |&(k, _)| k)
.map(|idx| self.0[idx].1)
.ok()
}
}

pub struct CharDataTableRange<V: 'static>(#[doc(hidden)] pub &'static [(CharRange, V)]);
impl<V: 'static> Default for CharDataTableRange<V> {
fn default() -> CharDataTableRange<V> { CharDataTableRange(&[]) }
}
impl<V: 'static + Copy> TCharDataTable for CharDataTableRange<V> {
type Item = V;
fn contains(&self, needle: char) -> bool {
self.0
.binary_search_by(|&(range, _)| range.cmp(needle))
.is_ok()
}
fn find(&self, needle: char) -> Option<V> {
self.0
.binary_search_by(|&(range, _)| range.cmp(needle))
.map(|idx| self.0[idx].1)
.ok()
}
}

pub struct CharDataTableDirectSlice<V: 'static> {
#[doc(hidden)]
table: &'static [(char, Range<u32>)],
refdata: &'static [V],
}
impl<V: 'static> Default for CharDataTableDirectSlice<V> {
fn default() -> CharDataTableDirectSlice<V> {
CharDataTableDirectSlice {
table: &[],
refdata: &[],
}
}
}
impl<V: 'static> TCharDataTable for CharDataTableDirectSlice<V> {
type Item = &'static [V];
fn contains(&self, needle: char) -> bool {
self.table
.binary_search_by_key(&needle, |&(k, _)| k)
.is_ok()
}
fn find(&self, needle: char) -> Option<&'static [V]> {
self.table
.binary_search_by_key(&needle, |&(k, _)| k)
.map(|idx| {
let range = self.table[idx].1.clone();
&self.refdata[range.start as usize..range.end as usize]
})
.ok()
}
}

/// A mapping from characters to some associated data.
///
/// For the set case, use `()` as the associated value.
Expand All @@ -31,7 +108,7 @@ impl<V> Default for CharDataTable<V> {

impl<V> CharDataTable<V> {
/// Does this table contain a mapping for a character?
pub fn contains(&self, needle: char) -> bool {
pub fn contains_impl(&self, needle: char) -> bool {
match *self {
CharDataTable::Direct(table) => {
table.binary_search_by_key(&needle, |&(k, _)| k).is_ok()
Expand All @@ -45,7 +122,7 @@ impl<V> CharDataTable<V> {

impl<V: Copy> CharDataTable<V> {
/// Find the associated data for a character in this table.
pub fn find(&self, needle: char) -> Option<V> {
pub fn find_impl(&self, needle: char) -> Option<V> {
match *self {
CharDataTable::Direct(table) => table
.binary_search_by_key(&needle, |&(k, _)| k)
Expand All @@ -59,10 +136,13 @@ impl<V: Copy> CharDataTable<V> {
}
}

impl<V: Copy + Default> CharDataTable<V> {
/// Find the associated data for a character in this table, or the default value if not entered.
pub fn find_or_default(&self, needle: char) -> V {
self.find(needle).unwrap_or_else(Default::default)
impl<V: Copy> TCharDataTable for CharDataTable<V> {
type Item = V;
fn contains(&self, needle: char) -> bool {
self.contains_impl(needle)
}
fn find(&self, needle: char) -> Option<Self::Item> {
self.find_impl(needle)
}
}

Expand Down
7 changes: 1 addition & 6 deletions unic/char/property/tests/tables_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ extern crate unic_char_range;

extern crate unic_char_property;

use unic_char_property::tables::CharDataTable;
use unic_char_property::tables::{CharDataTable, TCharDataTable};

#[test]
fn test_range_value_table() {
Expand All @@ -24,22 +24,17 @@ fn test_range_value_table() {
]);
for ch in chars!('a'..='g') {
assert_eq!(TABLE.find(ch), Some(1));
assert_eq!(TABLE.find_or_default(ch), 1);
}
for ch in chars!('h'..='i') {
assert_eq!(TABLE.find(ch), None);
assert_eq!(TABLE.find_or_default(ch), 0);
}
for ch in chars!('j'..='q') {
assert_eq!(TABLE.find(ch), Some(2));
assert_eq!(TABLE.find_or_default(ch), 2);
}
for ch in chars!('r'..='v') {
assert_eq!(TABLE.find(ch), None);
assert_eq!(TABLE.find_or_default(ch), 0);
}
for ch in chars!('x'..='z') {
assert_eq!(TABLE.find(ch), Some(3));
assert_eq!(TABLE.find_or_default(ch), 3);
}
}
1 change: 1 addition & 0 deletions unic/idna/mapping/src/mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ mod data {
impl Mapping {
/// Get Mapping status of the character.
pub fn of(ch: char) -> Mapping {
use unic_char_property::tables::TCharDataTable;
data::MAPPING.find(ch).expect("Table is missing value")
}
}
Expand Down
1 change: 1 addition & 0 deletions unic/ucd/age/src/age.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ mod data {
impl Age {
/// Find the character *Age* property value.
pub fn of(ch: char) -> Option<Age> {
use unic_char_property::tables::TCharDataTable;
data::AGE_TABLE.find(ch).map(Age)
}

Expand Down
3 changes: 2 additions & 1 deletion unic/ucd/bidi/src/bidi_class.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,8 @@ mod data {
impl BidiClass {
/// Find the character `Bidi_Class` property value.
pub fn of(ch: char) -> BidiClass {
data::BIDI_CLASS_TABLE.find_or_default(ch)
use unic_char_property::tables::TCharDataTable;
data::BIDI_CLASS_TABLE.find(ch).unwrap_or_default()
}

/// If the `BidiClass` has strong or explicit Left-to-Right direction.
Expand Down
3 changes: 2 additions & 1 deletion unic/ucd/category/src/category.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,8 @@ mod data {
impl GeneralCategory {
/// Find the `GeneralCategory` of a single char.
pub fn of(ch: char) -> GeneralCategory {
data::GENERAL_CATEGORY_TABLE.find_or_default(ch)
use unic_char_property::tables::TCharDataTable;
data::GENERAL_CATEGORY_TABLE.find(ch).unwrap_or_default()
}
}

Expand Down
5 changes: 3 additions & 2 deletions unic/ucd/name/src/name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ pub enum Name {
#[cfg_attr(feature = "clippy", allow(len_without_is_empty))]
impl Name {
pub fn of(ch: char) -> Option<Name> {
use unic_char_property::tables::TCharDataTable;
match ch {
'\u{AC00}'...'\u{D7A3}' => Some(Name::NR1(ch)),
'\u{3400}'...'\u{4DB5}'
Expand Down Expand Up @@ -145,7 +146,7 @@ impl PartialOrd for Name {
}

mod data {
use unic_char_property::tables::CharDataTable;
use unic_char_property::tables::CharDataTableDirect;
include!("../tables/name_values.rsd");
pub const NAMES: CharDataTable<&[&str]> = include!("../tables/name_map.rsv");
pub const NAMES: CharDataTableDirect<&[&str]> = include!("../tables/name_map.rsv");
}
2 changes: 1 addition & 1 deletion unic/ucd/name/tables/name_map.rsv

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion unic/ucd/normal/src/canonical_combining_class.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ impl CanonicalCombiningClass {

/// Find the character `Canonical_Combining_Class` property value.
pub fn of(ch: char) -> CanonicalCombiningClass {
data::CANONICAL_COMBINING_CLASS_VALUES.find_or_default(ch)
use unic_char_property::tables::TCharDataTable;
data::CANONICAL_COMBINING_CLASS_VALUES.find(ch).unwrap_or_default()
}

// == Named values ==
Expand Down
6 changes: 3 additions & 3 deletions unic/ucd/normal/src/composition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use unic_char_property::tables::CharDataTable;
use unic_char_property::tables::{CharDataTable, TCharDataTable};

pub mod data {
use DecompositionType;
use decomposition_type::long_names::*;
use unic_char_property::tables::CharDataTable;
use unic_char_property::tables::{CharDataTable, CharDataTableDirect};

pub const CANONICAL_COMPOSITION_MAPPING: CharDataTable<CharDataTable<char>> =
include!("../tables/canonical_composition_mapping.rsv");

pub const CANONICAL_DECOMPOSITION_MAPPING: CharDataTable<&[char]> =
pub const CANONICAL_DECOMPOSITION_MAPPING: CharDataTableDirect<&[char]> =
include!("../tables/canonical_decomposition_mapping.rsv");

#[cfg_attr(rustfmt, rustfmt_skip)]
Expand Down
1 change: 1 addition & 0 deletions unic/ucd/normal/src/decomposition_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ impl PartialCharProperty for DecompositionType {
impl DecompositionType {
/// Find the DecompositionType of the character.
pub fn of(ch: char) -> Option<DecompositionType> {
use unic_char_property::tables::TCharDataTable;
// First, check for Hangul Syllables and other canonical decompositions
if hangul::is_syllable(ch) || canonical_decomposition(ch).is_some() {
return Some(DecompositionType::Canonical);
Expand Down
1 change: 1 addition & 0 deletions unic/ucd/normal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ mod hangul;
/// See [Unicode Standard Annex #15](https://www.unicode.org/reports/tr15/)
/// for more information.
pub fn compose(a: char, b: char) -> Option<char> {
use unic_char_property::tables::TCharDataTable;
hangul::compose(a, b).or_else(|| canonical_composition(a).and_then(|table| table.find(b)))
}

Expand Down
2 changes: 1 addition & 1 deletion unic/ucd/normal/tables/canonical_decomposition_mapping.rsv

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion unic/ucd/segment/src/grapheme_cluster_break.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,8 @@ mod data {
impl GraphemeClusterBreak {
/// Find the character *Grapheme_Cluster_Break* property value.
pub fn of(ch: char) -> GraphemeClusterBreak {
data::GRAPHEME_CLUSTER_BREAK_TABLE.find_or_default(ch)
use unic_char_property::tables::TCharDataTable;
data::GRAPHEME_CLUSTER_BREAK_TABLE.find(ch).unwrap_or_default()
}
}

Expand Down
3 changes: 2 additions & 1 deletion unic/ucd/segment/src/sentence_break.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,8 @@ mod data {
impl SentenceBreak {
/// Find the character *Sentence_Break* property value.
pub fn of(ch: char) -> SentenceBreak {
data::SENTENCE_BREAK_TABLE.find_or_default(ch)
use unic_char_property::tables::TCharDataTable;
data::SENTENCE_BREAK_TABLE.find(ch).unwrap_or_default()
}
}

Expand Down
3 changes: 2 additions & 1 deletion unic/ucd/segment/src/word_break.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,8 @@ mod data {
impl WordBreak {
/// Find the character *Word_Break* property value.
pub fn of(ch: char) -> WordBreak {
data::WORD_BREAK_TABLE.find_or_default(ch)
use unic_char_property::tables::TCharDataTable;
data::WORD_BREAK_TABLE.find(ch).unwrap_or_default()
}
}

Expand Down