From b61cdf52f3ccce9b2b9dab8bb8e6b96ea40b84ba Mon Sep 17 00:00:00 2001 From: Dario Nieuwenhuis Date: Mon, 28 Oct 2024 19:14:30 +0100 Subject: [PATCH 1/4] decoder: improve version mismatch error message, don't mention probe-run. --- decoder/src/elf2table/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decoder/src/elf2table/mod.rs b/decoder/src/elf2table/mod.rs index 7c7d3f48b..a88b63209 100644 --- a/decoder/src/elf2table/mod.rs +++ b/decoder/src/elf2table/mod.rs @@ -229,7 +229,7 @@ pub fn parse_impl(elf: &[u8], check_version: bool) -> Result, anyh fn check_version(version: &str) -> Result<(), String> { if !DEFMT_VERSIONS.contains(&version) { let msg = format!( - "defmt wire format version mismatch: firmware is using {}, `probe-run` supports {}\nsuggestion: use a newer version of `defmt` or `cargo install` a different version of `probe-run` that supports defmt {}", + "defmt wire format version mismatch: firmware is using {}, this tool supports {}\nsuggestion: install a newer version of this tool that supports defmt wire format version {}", version, DEFMT_VERSIONS.join(", "), version ); From 072d481a28e5dfd7f887abcfa0f6959d9b8a2cf0 Mon Sep 17 00:00:00 2001 From: Dario Nieuwenhuis Date: Mon, 28 Oct 2024 19:16:00 +0100 Subject: [PATCH 2/4] Hex-encode defmt symbols to avoid compatibility issues. defmt currently puts json as-is in symbol names, which can contain special characters not normally found in symbol names like quotes `"`, braces `{}` and spaces ` `. This can cause compatibility issues with language features or tools that don't expect this. For example it breaks `sym` in `asm!`. This is a *breaking change* of the wire format, so this PR increases the format numbre. `defmt-decoder` is updated to be able to decode the new format, while keeping ability to decode older formats. --- CHANGELOG.md | 1 + decoder/src/elf2table/symbol.rs | 29 ++++++++++++++++++++++++++++- decoder/src/lib.rs | 4 ++-- defmt/src/lib.rs | 2 +- macros/src/construct/symbol.rs | 13 +++++++++++-- 5 files changed, 43 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f34f554f..1ea6e7d35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] - [#880]: Merge function calls emitted by the macro to save space. +- [#878]: Hex-encode defmt symbols to avoid compatibility issues. - [#874]: `defmt`: Fix doc test - [#872]: `defmt`: Add `expect!` as alias for `unwrap!` for discoverability - [#871]: Set MSRV to Rust 1.76 diff --git a/decoder/src/elf2table/symbol.rs b/decoder/src/elf2table/symbol.rs index ce35d4e1b..bc0ff32b7 100644 --- a/decoder/src/elf2table/symbol.rs +++ b/decoder/src/elf2table/symbol.rs @@ -1,3 +1,6 @@ +use std::borrow::Cow; + +use anyhow::bail; use serde::Deserialize; use crate::Tag; @@ -38,7 +41,11 @@ pub(super) enum SymbolTag { impl Symbol { pub fn demangle(raw: &str) -> anyhow::Result { - serde_json::from_str(raw) + let mut raw = Cow::from(raw); + if let Some(s) = raw.strip_prefix("__defmt_hex_") { + raw = Cow::from(hex_decode(s)?); + } + serde_json::from_str(&raw) .map_err(|j| anyhow::anyhow!("failed to demangle defmt symbol `{}`: {}", raw, j)) } @@ -77,3 +84,23 @@ impl Symbol { self.crate_name.as_deref() } } + +fn hex_decode_digit(c: u8) -> anyhow::Result { + match c { + b'0'..=b'9' => Ok(c - b'0'), + b'a'..=b'f' => Ok(c - b'a' + 0xa), + _ => bail!("invalid hex char '{c}'"), + } +} + +fn hex_decode(s: &str) -> anyhow::Result { + let s = s.as_bytes(); + if s.len() % 2 != 0 { + bail!("invalid hex: length must be even") + } + let mut res = vec![0u8; s.len() / 2]; + for i in 0..(s.len() / 2) { + res[i] = (hex_decode_digit(s[i * 2])? << 4) | hex_decode_digit(s[i * 2 + 1])?; + } + Ok(String::from_utf8(res)?) +} diff --git a/decoder/src/lib.rs b/decoder/src/lib.rs index affc01e71..9f893a206 100644 --- a/decoder/src/lib.rs +++ b/decoder/src/lib.rs @@ -7,10 +7,10 @@ #![cfg_attr(docsrs, doc(cfg(unstable)))] #![doc(html_logo_url = "https://knurling.ferrous-systems.com/knurling_logo_light_text.svg")] -pub const DEFMT_VERSIONS: &[&str] = &["3", "4"]; +pub const DEFMT_VERSIONS: &[&str] = &["3", "4", "5"]; // To avoid a breaking change, still provide `DEFMT_VERSION`. #[deprecated = "Please use DEFMT_VERSIONS instead"] -pub const DEFMT_VERSION: &str = DEFMT_VERSIONS[1]; +pub const DEFMT_VERSION: &str = DEFMT_VERSIONS[DEFMT_VERSIONS.len() - 1]; mod decoder; mod elf2table; diff --git a/defmt/src/lib.rs b/defmt/src/lib.rs index abffdf8ff..ec1d793e3 100644 --- a/defmt/src/lib.rs +++ b/defmt/src/lib.rs @@ -29,7 +29,7 @@ extern crate alloc; #[used] #[cfg_attr(target_os = "macos", link_section = ".defmt,end.VERSION")] #[cfg_attr(not(target_os = "macos"), link_section = ".defmt.end")] -#[export_name = "_defmt_version_ = 4"] +#[export_name = "_defmt_version_ = 5"] static DEFMT_VERSION: u8 = 0; #[used] diff --git a/macros/src/construct/symbol.rs b/macros/src/construct/symbol.rs index 77eae0b2f..7e2f05dcf 100644 --- a/macros/src/construct/symbol.rs +++ b/macros/src/construct/symbol.rs @@ -52,17 +52,26 @@ impl<'a> Symbol<'a> { } fn mangle(&self) -> String { - format!( + let json = format!( r#"{{"package":"{}","tag":"{}","data":"{}","disambiguator":"{}","crate_name":"{}"}}"#, json_escape(&self.package), json_escape(&self.tag), json_escape(self.data), self.disambiguator, json_escape(&self.crate_name), - ) + ); + format!("__defmt_hex_{}", hex_encode(&json)) } } +fn hex_encode(string: &str) -> String { + let mut res = String::new(); + for &b in string.as_bytes() { + write!(&mut res, "{b:02x}").unwrap(); + } + res +} + fn json_escape(string: &str) -> String { let mut escaped = String::new(); for c in string.chars() { From a36db0b100ea86f306225e02a222aede21f9adf5 Mon Sep 17 00:00:00 2001 From: Dario Nieuwenhuis Date: Tue, 5 Nov 2024 15:51:25 +0100 Subject: [PATCH 3/4] Add section to the book explaining the why and how of hex encoding. --- book/src/interning.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/book/src/interning.md b/book/src/interning.md index 6a45efcf0..af07f392c 100644 --- a/book/src/interning.md +++ b/book/src/interning.md @@ -73,3 +73,19 @@ static SYM: u8 = 0; The next thing to note is that each interned string symbol is one byte in size (because `static SYM` has type `u8`). Thanks to this the addresses of the symbols are consecutive: 0, 1, 2, etc. + +## Encoding + +Storing strings as-is in symbol names can cause compatibility problems, since it can contain any arbitrary character. For example: + +- The `'@'` character can't be used in symbol names because it's reserved for denoting symbol versions. +- The double-quote character `'"'` causes issues with escaping if you use it with `sym` inside an `asm!()` call. + +To deal with this, as of Wire Format Version 5, strings are encoded to bytes as UTF-8, and then the bytes are hex-encoded. +The symbol is prefixed with `__defmt_hex_` to denote it's is hex-encoded, and to allow for future expansion. + + +``` rust +#[export_name = "__defmt_hex_55534220636f6e74726f6c6c6572206973207265616479"] +static SYM: u8 = 0; +``` From 077ccf5a5fe8913c09af468d4b2c01b66737869c Mon Sep 17 00:00:00 2001 From: Dario Nieuwenhuis Date: Tue, 5 Nov 2024 16:05:13 +0100 Subject: [PATCH 4/4] Bump backcompat test revision. --- xtask/src/backcompat.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xtask/src/backcompat.rs b/xtask/src/backcompat.rs index ca9e57819..f12c63931 100644 --- a/xtask/src/backcompat.rs +++ b/xtask/src/backcompat.rs @@ -43,8 +43,8 @@ use crate::{ const DISABLED: bool = false; // use this format: PR - -// PR #747 - Bump wire format -const REVISION_UNDER_TEST: &str = "0e92d3a88aa472377b964979f522829d961d8986"; +// PR #878 - Bump wire format +const REVISION_UNDER_TEST: &str = "a36db0b100ea86f306225e02a222aede21f9adf5"; // the target name is in `firmware/qemu/.cargo/config.toml` but it'd be hard to extract it from that file const RUNNER_ENV_VAR: &str = "CARGO_TARGET_THUMBV7M_NONE_EABI_RUNNER";