Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- run: cargo build
- run: cargo build --features unicode_names2
- run: cargo test
220 changes: 220 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ readme = "README.md"
license = "Apache-2.0"
categories = ["encoding", "text-processing"]
keywords = ["unicode", "symbols"]

[build-dependencies]
unicode_names2 = { version = "1.3.0", optional = true }
29 changes: 24 additions & 5 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ fn tokenize(line: &str) -> StrResult<Line> {
return Ok(Line::Blank);
}

let (head, tail) = match line.split_once(' ') {
Some((a, b)) => (a, Some(b)),
let (head, tail) = match line.split_once(char::is_whitespace) {
Some((a, b)) => (a, Some(b.trim_start())),
None => (line, None),
};

Expand Down Expand Up @@ -121,10 +121,29 @@ fn validate_ident(string: &str) -> StrResult<()> {
/// Extracts either a single char or parses a U+XXXX escape.
fn decode_char(text: &str) -> StrResult<char> {
if let Some(hex) = text.strip_prefix("U+") {
u32::from_str_radix(hex, 16)
let (hex, name) = match hex.split_once(char::is_whitespace) {
Some((hex, name)) => (hex, Some(name.trim_start())),
None => (hex, None),
};

let ch = u32::from_str_radix(hex, 16)
.ok()
.and_then(|n| char::try_from(n).ok())
.ok_or_else(|| format!("invalid unicode escape {text:?}"))
.and_then(|n| char::from_u32(n))
.ok_or_else(|| format!("invalid unicode escape {hex:?}"))?;

#[allow(unused_variables)]
if let Some(name) = name {
#[cfg(feature = "unicode_names2")]
if unicode_names2::character(name) != Some(ch) {
return Err(format!(
"Incorrect name supplied for character U+{hex}: '{name}'{}",
unicode_names2::name(ch)
.map_or("".to_string(), |name| format!(" (expected {name})"))
));
}
}

Ok(ch)
} else {
let mut chars = text.chars();
match (chars.next(), chars.next()) {
Expand Down
56 changes: 28 additions & 28 deletions src/modules/sym.txt
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
// Control.
wj U+2060
zwj U+200D
zwnj U+200C
zws U+200B
lrm U+200E
rlm U+200F
// Layout control.
wj U+2060 Word joiner
zwj U+200D Zero width joiner
zwnj U+200C Zero width non-joiner
zws U+200B Zero width space
lrm U+200E Left-to-right mark
rlm U+200F Right-to-left mark

// Spaces.
space U+20
.nobreak U+A0
.nobreak.narrow U+202F
.en U+2002
.quad U+2003
.third U+2004
.quarter U+2005
.sixth U+2006
.med U+205F
.fig U+2007
.punct U+2008
.thin U+2009
.hair U+200A
space U+20 Space
.nobreak U+A0 No-break space
.nobreak.narrow U+202F Narrow no-break space
.en U+2002 En space
.quad U+2003 Em space
.third U+2004 Three-per-em space
.quarter U+2005 Four-per-em space
.sixth U+2006 Six-per-em space
.fig U+2007 Figure space
.punct U+2008 Punctuation space
.thin U+2009 Thin space
.hair U+200A Hair space
.med U+205F Medium mathematical space

// Delimiters.
paren
Expand All @@ -30,9 +30,9 @@ paren
.t ⏜
.b ⏝
brace
.l U+7B
.l U+7B Left curly bracket
.l.double ⦃
.r U+7D
.r U+7D Right curly bracket
.r.double ⦄
.t ⏞
.b ⏟
Expand Down Expand Up @@ -130,14 +130,14 @@ dash
.wave.double 〰
dot
.op ⋅
.basic U+2E
.basic U+2E Full stop
.c ·
.circle ⊙
.circle.big ⨀
.square ⊡
.double ¨
.triple U+20DB
.quad U+20DC
.triple U+20DB Combining three dots above
.quad U+20DC Combining four dots above
excl !
.double ‼
.inv ¡
Expand All @@ -149,10 +149,10 @@ quest ?
interrobang ‽
hash #
hyph ‐
.minus U+2D
.nobreak U+2011
.minus U+2D Hyphen-minus
.nobreak U+2011 Non-breaking hyphen
.soft U+AD Soft hyphen
.point ‧
.soft U+AD
numero №
percent %
permille ‰
Expand Down