Skip to content

Commit 9dfe9f6

Browse files
committed
[ucd/unihan] Update to Rust 2018
1 parent 95446b4 commit 9dfe9f6

File tree

12 files changed

+84
-65
lines changed

12 files changed

+84
-65
lines changed

gen/src/source/ucd/unihan/mod.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@ use regex::Regex;
1919

2020
lazy_static! {
2121
pub static ref UNIHAN_DATA_ENTRY_REGEX: Regex = Regex::new(
22-
r"(?xm)^ # every line
22+
r"(?xm)^ # every line
2323
U\+([[:xdigit:]]{4,6}) # [1]codepoint
2424
\t # separator
2525
(k[a-zA-Z0-9_]+) # [2]field key
2626
\t # separator
2727
(.*) # [3]field value
2828
",
29-
).unwrap();
29+
)
30+
.unwrap();
3031
}
3132

3233
pub trait DataEntry {
@@ -36,7 +37,7 @@ pub trait DataEntry {
3637

3738
pub fn parse_entries_from_str<T>(str: &str) -> Vec<T>
3839
where
39-
T: DataEntry + Clone
40+
T: DataEntry + Clone,
4041
{
4142
let mut entry_map: BTreeMap<char, T> = BTreeMap::default();
4243

@@ -52,11 +53,11 @@ where
5253
let mut entry = T::new(chr);
5354
entry.update(key, value);
5455
entry_map.insert(chr, entry);
55-
},
56+
}
5657
Some(_) => {
57-
let mut entry = entry_map.get_mut(&chr).unwrap();
58+
let entry = entry_map.get_mut(&chr).unwrap();
5859
entry.update(key, value);
59-
},
60+
}
6061
}
6162
}
6263

gen/src/source/ucd/unihan/numeric_values.rs

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@
1010

1111
use std::str::FromStr;
1212

13-
use source::utils::read;
13+
use crate::source::utils::read;
1414

15-
use super::{DataEntry, parse_entries_from_str};
15+
use super::{parse_entries_from_str, DataEntry};
1616

1717
lazy_static! {
1818
/// [Numeric values]: http://www.unicode.org/reports/tr38/#N1024D
1919
pub static ref UNIHAN_NUMERIC_VALUES_DATA: NumericValuesData = {
20-
read("data/ucd/Unihan/Unihan_NumericValues.txt").parse().unwrap()
20+
read("external/unicode/ucd/data/Unihan/Unihan_NumericValues.txt").parse().unwrap()
2121
};
2222
}
2323

@@ -44,14 +44,14 @@ impl DataEntry for NumericValuesDataEntry {
4444
"kAccountingNumeric" => self.accounting_numeric = value.parse::<u64>().ok(),
4545
"kOtherNumeric" => self.other_numeric = value.parse::<u64>().ok(),
4646
"kPrimaryNumeric" => self.primary_numeric = value.parse::<u64>().ok(),
47-
_ => {},
47+
_ => {}
4848
}
4949
}
5050
}
5151

5252
#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
5353
pub struct NumericValuesData {
54-
pub entries: Box<[NumericValuesDataEntry]>,
54+
pub entries: Box<[NumericValuesDataEntry]>,
5555
}
5656

5757
impl FromStr for NumericValuesData {
@@ -80,17 +80,14 @@ mod test {
8080
let mut entry3 = NumericValuesDataEntry::new('\u{5146}');
8181
entry3.primary_numeric = Some(1000000000000);
8282

83-
let entries = vec![
84-
entry1,
85-
entry2,
86-
entry3
87-
];
83+
let entries = vec![entry1, entry2, entry3];
8884

8985
assert_eq!(
9086
"U+3405 kOtherNumeric 5\n\
9187
U+4EDF kAccountingNumeric 1000\n\
9288
U+5146 kPrimaryNumeric 1000000000000\n\
93-
".parse(),
89+
"
90+
.parse(),
9491
Ok(NumericValuesData {
9592
entries: entries.into_boxed_slice(),
9693
}),

gen/src/source/ucd/unihan/readings.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@
1010

1111
use std::str::FromStr;
1212

13-
use source::utils::read;
13+
use crate::source::utils::read;
1414

15-
use super::{DataEntry, parse_entries_from_str};
15+
use super::{parse_entries_from_str, DataEntry};
1616

1717
lazy_static! {
1818
/// [Readings]: http://www.unicode.org/reports/tr38/#N1019C
1919
pub static ref UNIHAN_READINGS_DATA: ReadingsData = {
20-
read("data/ucd/Unihan/Unihan_Readings.txt").parse().unwrap()
20+
read("external/unicode/ucd/data/Unihan/Unihan_Readings.txt").parse().unwrap()
2121
};
2222
}
2323

@@ -71,7 +71,7 @@ impl DataEntry for ReadingsDataEntry {
7171
"kTang" => self.tang = Some(value.to_owned()),
7272
"kVietnamese" => self.vietnamese = Some(value.to_owned()),
7373
"kXHC1983" => self.xhc_1983 = Some(value.to_owned()),
74-
_ => {},
74+
_ => {}
7575
}
7676
}
7777
}

gen/src/source/ucd/unihan/variants.rs

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@ use std::str::FromStr;
1313

1414
use regex::Regex;
1515

16-
use source::utils::read;
16+
use crate::source::utils::read;
1717

18-
use super::{DataEntry, parse_entries_from_str};
18+
use super::{parse_entries_from_str, DataEntry};
1919

2020
lazy_static! {
2121
/// [Variants]: http://www.unicode.org/reports/tr38/#N10211
2222
pub static ref UNIHAN_VARIANTS_DATA: VariantsData = {
23-
read("data/ucd/Unihan/Unihan_Variants.txt").parse().unwrap()
23+
read("external/unicode/ucd/data/Unihan/Unihan_Variants.txt").parse().unwrap()
2424
};
2525

2626
pub static ref VALUE_REGEX: Regex = Regex::new(
@@ -75,20 +75,24 @@ impl DataEntry for VariantsDataEntry {
7575

7676
fn update<'a>(&mut self, key: &'a str, value: &'a str) {
7777
match key {
78-
"kSemanticVariant" =>
78+
"kSemanticVariant" => {
7979
self.semantic_variants =
80-
Some(VariantsDataEntry::parse_values_with_additional_data(value)),
81-
"kSimplifiedVariant" =>
82-
self.simplified_variant = Some(VariantsDataEntry::parse_value(value)),
83-
"kSpecializedSemanticVariant" =>
80+
Some(VariantsDataEntry::parse_values_with_additional_data(value))
81+
}
82+
"kSimplifiedVariant" => {
83+
self.simplified_variant = Some(VariantsDataEntry::parse_value(value))
84+
}
85+
"kSpecializedSemanticVariant" => {
8486
self.specialized_semantic_variants =
85-
Some(VariantsDataEntry::parse_values_with_additional_data(value)),
86-
"kTraditionalVariant" =>
87-
self.traditional_variant = Some(VariantsDataEntry::parse_value(value)),
88-
"kZVariant" =>
89-
self.z_variants =
90-
Some(VariantsDataEntry::parse_values_with_additional_data(value)),
91-
_ => {},
87+
Some(VariantsDataEntry::parse_values_with_additional_data(value))
88+
}
89+
"kTraditionalVariant" => {
90+
self.traditional_variant = Some(VariantsDataEntry::parse_value(value))
91+
}
92+
"kZVariant" => {
93+
self.z_variants = Some(VariantsDataEntry::parse_values_with_additional_data(value))
94+
}
95+
_ => {}
9296
}
9397
}
9498
}

gen/src/writer/ucd/unihan.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,18 @@
1111
use std::collections::BTreeMap;
1212
use std::path::Path;
1313

14-
use source::ucd::readme::UNICODE_VERSION;
15-
use source::ucd::unihan::numeric_values::UNIHAN_NUMERIC_VALUES_DATA;
16-
use source::ucd::unihan::readings::UNIHAN_READINGS_DATA;
17-
use source::ucd::unihan::variants::UNIHAN_VARIANTS_DATA;
14+
use crate::source::ucd::{
15+
readme::UNICODE_VERSION,
16+
unihan::{
17+
numeric_values::UNIHAN_NUMERIC_VALUES_DATA, readings::UNIHAN_READINGS_DATA,
18+
variants::UNIHAN_VARIANTS_DATA,
19+
},
20+
};
1821

19-
use writer::common::emit_unicode_version;
20-
use writer::utils::tables::ToDirectCharTable;
21-
use writer::utils::write;
22+
use crate::writer::{
23+
common::emit_unicode_version,
24+
utils::{tables::ToDirectCharTable, write},
25+
};
2226

2327
pub fn generate(dir: &Path) {
2428
emit_unicode_version(dir, &UNICODE_VERSION);
@@ -214,7 +218,8 @@ fn emit_unihan_variants_tables(dir: &Path) {
214218
write(
215219
dir,
216220
"specialized_semantic_variants_map.rsv",
217-
&specialized_semantic_variants_map.to_direct_char_table(|record, f| write!(f, "{:?}", record)),
221+
&specialized_semantic_variants_map
222+
.to_direct_char_table(|record, f| write!(f, "{:?}", record)),
218223
);
219224
write(
220225
dir,

unic/ucd/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ pub use unic_ucd_ident as ident;
3737
pub use unic_ucd_name as name;
3838
pub use unic_ucd_normal as normal;
3939
pub use unic_ucd_segment as segment;
40+
pub use unic_ucd_unihan as unihan;
4041

4142
pub use crate::version::UnicodeVersion;
4243

@@ -80,5 +81,7 @@ pub use crate::normal::CanonicalCombiningClass;
8081

8182
pub use crate::segment::{GraphemeClusterBreak, SentenceBreak, WordBreak};
8283

84+
pub use crate::unihan::{definition_of, mandarin_of, simplified_variant_of, traditional_variant_of};
85+
8386
mod pkg_info;
8487
pub use crate::pkg_info::{PKG_DESCRIPTION, PKG_NAME, PKG_VERSION};

unic/ucd/unihan/Cargo.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
[package]
22
name = "unic-ucd-unihan"
3-
version = "0.7.0"
3+
version = "0.8.0"
4+
edition = "2018"
45
authors = ["The UNIC Project Developers"]
56
repository = "https://github.com/behnam/rust-unic/"
67
license = "MIT/Apache-2.0"
@@ -12,8 +13,8 @@ categories = ["internationalization", "text-processing", "parsing", "rendering"]
1213
exclude = []
1314

1415
[dependencies]
15-
unic-ucd-version = { path = "../version/", version = "0.7.0" }
16-
unic-char-property = { path = "../../char/property/", version = "0.7.0" }
16+
unic-char-property = { path = "../../char/property/", version = "0.8.0" }
17+
unic-ucd-version = { path = "../version/", version = "0.8.0" }
1718

1819
[badges]
1920
maintenance = { status = "actively-developed" }

unic/ucd/unihan/src/lib.rs

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,30 @@
1010
// except according to those terms.
1111

1212
#![no_std]
13-
#![forbid(future_incompatible, missing_debug_implementations, unconditional_recursion, unsafe_code)]
14-
#![deny(bad_style, unsafe_code, unused)]
13+
#![warn(
14+
bad_style,
15+
missing_debug_implementations,
16+
missing_docs,
17+
unconditional_recursion
18+
)]
19+
#![forbid(unsafe_code)]
1520

16-
extern crate unic_ucd_version;
17-
extern crate unic_char_property;
21+
//! # UNIC - UCD - Unihan
22+
//!
23+
//! A component of [`unic`: Unicode and Internationalization Crates for Rust](/unic/).
24+
//!
25+
//! Accessor for Unicode Han Database (Unihan)
1826
1927
mod readings;
20-
pub use readings::{definition_of, mandarin_of};
28+
pub use crate::readings::{definition_of, mandarin_of};
2129

2230
mod variants;
23-
pub use variants::{simplified_variant_of, traditional_variant_of};
31+
pub use crate::variants::{simplified_variant_of, traditional_variant_of};
2432

2533
use unic_ucd_version::UnicodeVersion;
2634

2735
mod pkg_info;
28-
pub use pkg_info::{PKG_DESCRIPTION, PKG_NAME, PKG_VERSION};
36+
pub use crate::pkg_info::{PKG_DESCRIPTION, PKG_NAME, PKG_VERSION};
2937

3038
/// The [Unicode version](https://www.unicode.org/versions/) of data
3139
pub const UNICODE_VERSION: UnicodeVersion = include!("../tables/unicode_version.rsv");

unic/ucd/unihan/src/readings.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
// option. This file may not be copied, modified, or distributed
1010
// except according to those terms.
1111

12+
/// An English definition for the given character
1213
pub fn definition_of(ch: char) -> Option<&'static str> {
1314
data::DEFINITIONS.find(ch)
1415
}
1516

17+
/// The most customary pinyin reading for the given character
1618
pub fn mandarin_of(ch: char) -> Option<&'static str> {
1719
// TODO: When there are two values, then the first is preferred for
1820
// zh-Hans (CN) and the second is preferred for zh-Hant (TW).

unic/ucd/unihan/src/variants.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
// option. This file may not be copied, modified, or distributed
1010
// except according to those terms.
1111

12+
/// The Unicode value(s) for the simplified Chinese variant(s) for the given character
1213
pub fn simplified_variant_of(ch: char) -> Option<char> {
1314
data::SIMPLIFIED_VARIANT.find(ch)
1415
}
1516

17+
/// The Unicode value(s) for the traditional Chinese variant(s) for the given character
1618
pub fn traditional_variant_of(ch: char) -> Option<char> {
1719
data::TRADITIONAL_VARIANT.find(ch)
1820
}

0 commit comments

Comments
 (0)