From ed1550dc06bfcf2124ea90b98d662365dc03bd6a Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Fri, 15 Aug 2025 19:48:39 +0200 Subject: [PATCH 1/8] apk/res: Add robustness assertions for flags and reserved fields This still parses Android 33's `android.jar` successfully, but paves the way towards detecting new fields and flags that influence how newer resource tables should be parsed, rather than getting stuck reading random data. --- apk/src/compiler/table.rs | 5 +- apk/src/res.rs | 114 ++++++++++++++++++++++++-------------- pri/src/resource_map.rs | 15 ----- 3 files changed, 75 insertions(+), 59 deletions(-) diff --git a/apk/src/compiler/table.rs b/apk/src/compiler/table.rs index 3572e6cf..caad0675 100644 --- a/apk/src/compiler/table.rs +++ b/apk/src/compiler/table.rs @@ -191,6 +191,7 @@ pub struct Table { impl Table { pub fn import_apk(&mut self, apk: &Path) -> Result<()> { + tracing::trace!("Parse `resources.arsc` chunk from `{apk:?}`"); let resources = xcommon::extract_zip_file(apk, "resources.arsc")?; let chunk = Chunk::parse(&mut Cursor::new(resources))?; self.import_chunk(&chunk); @@ -218,7 +219,7 @@ impl Table { } } - fn lookup_package(&self, id: u8) -> Result { + fn lookup_package(&self, id: u8) -> Result> { for package in &self.packages { if let Chunk::TablePackage(header, chunks) = package { if header.id == id as u32 { @@ -229,7 +230,7 @@ impl Table { anyhow::bail!("failed to locate package {}", id); } - pub fn entry_by_ref(&self, r: Ref) -> Result { + pub fn entry_by_ref(&self, r: Ref) -> Result> { let id = self.lookup_package_id(r.package)?; let package = self.lookup_package(id)?; let id = package.lookup_type_id(r.ty)?; diff --git a/apk/src/res.rs b/apk/src/res.rs index 2fbc7757..c4606f20 100644 --- a/apk/src/res.rs +++ b/apk/src/res.rs @@ -98,6 +98,11 @@ impl ResStringPoolHeader { let string_count = r.read_u32::()?; let style_count = r.read_u32::()?; let flags = r.read_u32::()?; + assert_eq!( + flags & !(Self::SORTED_FLAG | Self::UTF8_FLAG), + 0, + "Unrecognized ResStringPoolHeader flags" + ); let strings_start = r.read_u32::()?; let styles_start = r.read_u32::()?; Ok(Self { @@ -148,12 +153,10 @@ pub struct ResXmlNodeHeader { impl ResXmlNodeHeader { pub fn read(r: &mut impl Read) -> Result { + // TODO: Why is this skipped? let _line_number = r.read_u32::()?; let _comment = r.read_i32::()?; - Ok(Self { - line_number: 1, - comment: -1, - }) + Ok(Self::default()) } pub fn write(&self, w: &mut impl Write) -> Result<()> { @@ -216,21 +219,6 @@ pub struct ResXmlStartElement { pub style_index: u16, } -impl Default for ResXmlStartElement { - fn default() -> Self { - Self { - namespace: -1, - name: -1, - attribute_start: 0x0014, - attribute_size: 0x0014, - attribute_count: 0, - id_index: 0, - class_index: 0, - style_index: 0, - } - } -} - impl ResXmlStartElement { pub fn read(r: &mut impl Read) -> Result { let namespace = r.read_i32::()?; @@ -353,12 +341,6 @@ impl From for u32 { } } -impl std::fmt::Display for ResTableRef { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", self.0) - } -} - #[derive(Clone, Debug, Eq, PartialEq)] pub struct ResTablePackageHeader { /// If this is a base package, its ID. Package IDs start @@ -451,7 +433,15 @@ impl ResTableTypeSpecHeader { pub fn read(r: &mut impl Read) -> Result { let id = r.read_u8()?; let res0 = r.read_u8()?; + debug_assert_eq!( + res0, 0, + "ResTableTypeSpecHeader reserved field 0 should be 0" + ); let res1 = r.read_u16::()?; + debug_assert_eq!( + res1, 0, + "ResTableTypeSpecHeader reserved field 1 should be 0" + ); let entry_count = r.read_u32::()?; Ok(Self { id, @@ -489,10 +479,12 @@ pub struct ResTableTypeHeader { } impl ResTableTypeHeader { - pub fn read(r: &mut impl Read) -> Result { + pub fn read(r: &mut (impl Read + Seek)) -> Result { let id = r.read_u8()?; let res0 = r.read_u8()?; + debug_assert_eq!(res0, 0, "ResTableTypeHeader reserved field 0 should be 0"); let res1 = r.read_u16::()?; + debug_assert_eq!(res1, 0, "ResTableTypeHeader reserved field 1 should be 0"); let entry_count = r.read_u32::()?; let entries_start = r.read_u32::()?; let config = ResTableConfig::read(r)?; @@ -506,7 +498,7 @@ impl ResTableTypeHeader { }) } - pub fn write(&self, w: &mut impl Write) -> Result<()> { + pub fn write(&self, w: &mut (impl Write + Seek)) -> Result<()> { w.write_u8(self.id)?; w.write_u8(self.res0)?; w.write_u16::(self.res1)?; @@ -530,7 +522,8 @@ pub struct ResTableConfig { } impl ResTableConfig { - pub fn read(r: &mut impl Read) -> Result { + pub fn read(r: &mut (impl Read + Seek)) -> Result { + let start_pos = r.stream_position()?; let size = r.read_u32::()?; let imsi = r.read_u32::()?; let locale = r.read_u32::()?; @@ -538,7 +531,8 @@ impl ResTableConfig { let input = r.read_u32::()?; let screen_size = r.read_u32::()?; let version = r.read_u32::()?; - let unknown_len = size as usize - 28; + let known_len = r.stream_position()? - start_pos; + let unknown_len = size as usize - known_len as usize; let mut unknown = vec![0; unknown_len]; r.read_exact(&mut unknown)?; Ok(Self { @@ -553,7 +547,8 @@ impl ResTableConfig { }) } - pub fn write(&self, w: &mut impl Write) -> Result<()> { + pub fn write(&self, w: &mut (impl Write + Seek)) -> Result<()> { + let start_pos = w.stream_position()?; w.write_u32::(self.size)?; w.write_u32::(self.imsi)?; w.write_u32::(self.locale)?; @@ -562,6 +557,7 @@ impl ResTableConfig { w.write_u32::(self.screen_size)?; w.write_u32::(self.version)?; w.write_all(&self.unknown)?; + debug_assert_eq!(self.size as u64, w.stream_position()? - start_pos); Ok(()) } } @@ -602,19 +598,20 @@ pub struct ResTableEntry { } impl ResTableEntry { - pub fn is_complex(&self) -> bool { - self.flags & 0x1 > 0 - } - - pub fn is_public(&self) -> bool { - self.flags & 0x2 > 0 - } + const FLAG_COMPLEX: u16 = 0x1; + const FLAG_PUBLIC: u16 = 0x2; + const FLAG_WEAK: u16 = 0x4; pub fn read(r: &mut impl Read) -> Result { let size = r.read_u16::()?; let flags = r.read_u16::()?; let key = r.read_u32::()?; - let is_complex = flags & 0x1 > 0; + debug_assert_eq!( + flags & !(Self::FLAG_COMPLEX | Self::FLAG_PUBLIC | Self::FLAG_WEAK), + 0, + "Unrecognized ResTableEntry flags" + ); + let is_complex = flags & Self::FLAG_COMPLEX != 0; if is_complex { debug_assert_eq!(size, 16); } else { @@ -686,6 +683,7 @@ impl ResValue { let size = r.read_u16::()?; debug_assert_eq!(size, 8); let res0 = r.read_u8()?; + debug_assert_eq!(res0, 0, "ResValue reserved field 0 should be 0"); let data_type = r.read_u8()?; let data = r.read_u32::()?; Ok(Self { @@ -851,6 +849,9 @@ impl ResSpan { } } +// TODO: Remove all *Header structures from these elements. This enum is user-facing in a +// high-level data structure, where all byte offsets are irrelevant to the user after parsing, or +// nigh-impossible to guess before writing. #[derive(Clone, Debug, Eq, PartialEq)] pub enum Chunk { Null, @@ -873,7 +874,7 @@ impl Chunk { let start_pos = r.stream_position()?; let header = ResChunkHeader::read(r)?; let end_pos = start_pos + header.size as u64; - match ChunkType::from_u16(header.ty) { + let result = match ChunkType::from_u16(header.ty) { Some(ChunkType::Null) => { tracing::trace!("null"); Ok(Chunk::Null) @@ -984,10 +985,21 @@ impl Chunk { Some(ChunkType::XmlStartElement) => { tracing::trace!("xml start element"); let node_header = ResXmlNodeHeader::read(r)?; + let element_pos = r.stream_position()?; let start_element = ResXmlStartElement::read(r)?; let mut attributes = Vec::with_capacity(start_element.attribute_count as usize); + debug_assert_eq!( + element_pos + start_element.attribute_start as u64, + r.stream_position()?, + "TODO: Handle padding between XmlStartElement and attributes" + ); for _ in 0..start_element.attribute_count { + let attr_pos = r.stream_position()?; attributes.push(ResXmlAttribute::read(r)?); + debug_assert_eq!( + attr_pos + start_element.attribute_size as u64, + r.stream_position()? + ); } Ok(Chunk::XmlStartElement( node_header, @@ -1027,11 +1039,21 @@ impl Chunk { let entry = r.read_u32::()?; index.push(entry); } + debug_assert_eq!( + start_pos + type_header.entries_start as u64, + r.stream_position()?, + "TODO: Handle padding between TableType index and entries" + ); let mut entries = Vec::with_capacity(type_header.entry_count as usize); - for offset in &index { - if *offset == 0xffff_ffff { + for &offset in &index { + if offset == 0xffff_ffff { entries.push(None); } else { + debug_assert_eq!( + start_pos + type_header.entries_start as u64 + offset as u64, + r.stream_position()?, + "TODO: Handle non-sequential or padding between entries in TableType" + ); let entry = ResTableEntry::read(r)?; entries.push(Some(entry)); } @@ -1056,7 +1078,15 @@ impl Chunk { None => { anyhow::bail!("unrecognized chunk {:?}", header); } - } + }; + + debug_assert_eq!( + r.stream_position().unwrap(), + end_pos, + "Did not read entire chunk for {header:?}" + ); + + result } pub fn write(&self, w: &mut W) -> Result<()> { diff --git a/pri/src/resource_map.rs b/pri/src/resource_map.rs index a8fb6166..fd5f7abf 100644 --- a/pri/src/resource_map.rs +++ b/pri/src/resource_map.rs @@ -220,18 +220,3 @@ pub enum ResourceValueType { AsciiPath, Utf8Path, } - -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub struct CandidateSet { - pub resource_map_item: u32, - pub decision_index: u16, - pub candidates: Vec, -} - -#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] -pub struct Candidate { - pub qualifier_set: u16, - pub ty: ResourceValueType, - pub data_item_section: u16, - pub data_item_index: u16, -} From 3b414def1c994139351e7135fe956c743b4719eb Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Sun, 17 Aug 2025 14:55:42 +0200 Subject: [PATCH 2/8] apk/res: Make type IDs of `0` invalid --- apk/src/compiler/mod.rs | 16 +++++++++------- apk/src/compiler/table.rs | 9 +++++---- apk/src/res.rs | 23 +++++++++++++---------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/apk/src/compiler/mod.rs b/apk/src/compiler/mod.rs index 31a5d9ad..203f5ef5 100644 --- a/apk/src/compiler/mod.rs +++ b/apk/src/compiler/mod.rs @@ -1,3 +1,5 @@ +use std::num::NonZeroU8; + use crate::manifest::AndroidManifest; use crate::res::{ Chunk, ResTableConfig, ResTableEntry, ResTableHeader, ResTablePackageHeader, @@ -44,18 +46,18 @@ pub fn compile_mipmap<'a>(package_name: &str, name: &'a str) -> Result(package_name: &str, name: &'a str) -> Result Chunk { +fn mipmap_table_type(type_id: NonZeroU8, density: u16, string_id: u32) -> Chunk { Chunk::TableType( ResTableTypeHeader { id: type_id, diff --git a/apk/src/compiler/table.rs b/apk/src/compiler/table.rs index caad0675..bec336cf 100644 --- a/apk/src/compiler/table.rs +++ b/apk/src/compiler/table.rs @@ -1,6 +1,7 @@ use crate::res::{Chunk, ResAttributeType, ResTableEntry, ResTableRef, ResTableValue, ResValue}; use anyhow::{Context, Result}; use std::io::Cursor; +use std::num::NonZeroU8; use std::path::Path; pub struct Ref<'a> { @@ -70,13 +71,13 @@ impl<'a> Package<'a> { }) } - fn lookup_type_id(&self, name: &str) -> Result { + fn lookup_type_id(&self, name: &str) -> Result { let id = self .types .iter() .position(|s| s.as_str() == name) .with_context(|| format!("failed to locate type id {name}"))?; - Ok(id as u8 + 1) + NonZeroU8::new(id as u8 + 1).context("overflow") } fn lookup_key_id(&self, name: &str) -> Result { @@ -88,7 +89,7 @@ impl<'a> Package<'a> { Ok(id as u32) } - fn lookup_type(&self, id: u8) -> Result> { + fn lookup_type(&self, id: NonZeroU8) -> Result> { for chunk in self.chunks { if let Chunk::TableType(header, _offsets, entries) = chunk { if header.id == id { @@ -106,7 +107,7 @@ impl<'a> Package<'a> { struct Type<'a> { package: u8, - id: u8, + id: NonZeroU8, entries: &'a [Option], } diff --git a/apk/src/res.rs b/apk/src/res.rs index c4606f20..6a319b39 100644 --- a/apk/src/res.rs +++ b/apk/src/res.rs @@ -1,6 +1,9 @@ -use anyhow::Result; +use anyhow::{Context as _, Result}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; -use std::io::{Read, Seek, SeekFrom, Write}; +use std::{ + io::{Read, Seek, SeekFrom, Write}, + num::NonZeroU8, +}; #[derive(Clone, Copy, Debug, Eq, PartialEq)] #[repr(u16)] @@ -309,9 +312,9 @@ impl ResXmlEndElement { pub struct ResTableRef(u32); impl ResTableRef { - pub fn new(package: u8, ty: u8, entry: u16) -> Self { + pub fn new(package: u8, ty: NonZeroU8, entry: u16) -> Self { let package = (package as u32) << 24; - let ty = (ty as u32) << 16; + let ty = (ty.get() as u32) << 16; let entry = entry as u32; Self(package | ty | entry) } @@ -420,7 +423,7 @@ pub struct ResTableTypeSpecHeader { /// The type identifier this chunk is holding. Type IDs start /// at 1 (corresponding to the value of the type bits in a /// resource identifier). 0 is invalid. - pub id: u8, + pub id: NonZeroU8, /// Must be 0. pub res0: u8, /// Must be 0. @@ -431,7 +434,7 @@ pub struct ResTableTypeSpecHeader { impl ResTableTypeSpecHeader { pub fn read(r: &mut impl Read) -> Result { - let id = r.read_u8()?; + let id = NonZeroU8::new(r.read_u8()?).context("ID of 0 is invalid")?; let res0 = r.read_u8()?; debug_assert_eq!( res0, 0, @@ -452,7 +455,7 @@ impl ResTableTypeSpecHeader { } pub fn write(&self, w: &mut impl Write) -> Result<()> { - w.write_u8(self.id)?; + w.write_u8(self.id.get())?; w.write_u8(self.res0)?; w.write_u16::(self.res1)?; w.write_u32::(self.entry_count)?; @@ -465,7 +468,7 @@ pub struct ResTableTypeHeader { /// The type identifier this chunk is holding. Type IDs start /// at 1 (corresponding to the value of the type bits in a /// resource identifier). 0 is invalid. - pub id: u8, + pub id: NonZeroU8, /// Must be 0. pub res0: u8, /// Must be 0. @@ -480,7 +483,7 @@ pub struct ResTableTypeHeader { impl ResTableTypeHeader { pub fn read(r: &mut (impl Read + Seek)) -> Result { - let id = r.read_u8()?; + let id = NonZeroU8::new(r.read_u8()?).context("ID of 0 is invalid")?; let res0 = r.read_u8()?; debug_assert_eq!(res0, 0, "ResTableTypeHeader reserved field 0 should be 0"); let res1 = r.read_u16::()?; @@ -499,7 +502,7 @@ impl ResTableTypeHeader { } pub fn write(&self, w: &mut (impl Write + Seek)) -> Result<()> { - w.write_u8(self.id)?; + w.write_u8(self.id.get())?; w.write_u8(self.res0)?; w.write_u16::(self.res1)?; w.write_u32::(self.entry_count)?; From 66320a39dd0839f3c976921d9b8662c2b2034000 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Sun, 17 Aug 2025 15:45:03 +0200 Subject: [PATCH 3/8] apk/res: Take offset and `entries_start` into account when parsing `Type` chunks The original code was assuming that all non-empty `Type` entries were stored sequentially, directly after the index list and tightly packed together with no padding in between. Previously added assertions support this case, but this will no longer be fully correct with the introduction of a sparse `Type` in Android 34, and 16-bit offsets in Android 35. Note that a future patch should be removing the unusable index array that is exposed to the user: this byte-offset is no longer relevant to them when the elements have been parsed, and is terribly hard and error-prone to set up manually before serializing. This however becomes tedious to deal with in our writer. --- apk/src/res.rs | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/apk/src/res.rs b/apk/src/res.rs index 6a319b39..6eccdc5f 100644 --- a/apk/src/res.rs +++ b/apk/src/res.rs @@ -1037,26 +1037,19 @@ impl Chunk { Some(ChunkType::TableType) => { tracing::trace!("table type"); let type_header = ResTableTypeHeader::read(r)?; - let mut index = Vec::with_capacity(type_header.entry_count as usize); + let mut entries = Vec::with_capacity(type_header.entry_count as usize); + let mut index = Vec::with_capacity(type_header.entry_count as usize); // TODO: Removing this bogus mapping from the API requires rewriting the write() implementation for _ in 0..type_header.entry_count { - let entry = r.read_u32::()?; - index.push(entry); + let offset = r.read_u32::()?; + index.push(offset); } - debug_assert_eq!( - start_pos + type_header.entries_start as u64, - r.stream_position()?, - "TODO: Handle padding between TableType index and entries" - ); - let mut entries = Vec::with_capacity(type_header.entry_count as usize); for &offset in &index { if offset == 0xffff_ffff { entries.push(None); } else { - debug_assert_eq!( + r.seek(SeekFrom::Start( start_pos + type_header.entries_start as u64 + offset as u64, - r.stream_position()?, - "TODO: Handle non-sequential or padding between entries in TableType" - ); + ))?; let entry = ResTableEntry::read(r)?; entries.push(Some(entry)); } From 963b3197ebeeecc044628ab9a8ef3d487ada40ad Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Sun, 17 Aug 2025 15:32:28 +0200 Subject: [PATCH 4/8] apk/res: Handle `Type` and `TypeSpec` headers in the parser and writer The header for `TypeSpec` and `Type`, as well as the "index" mapping contain byte offsets, reserved fields and flags that are only usable for the binary parser and writer. Even though handling these are cumbersome at writing time (because of jumping back to the header or offsets array to write accurate byte positions after first serializing prior elements), that's still a lot more correct than leaving the user to **guess** what byte offsets to put into the vector/headers. That is otherwise going to be even more more complicated with the introduction of a new `SPARSE` `Type` flag in Android 34, and `OFFSET16` `Type` flag in Android 35. --- apk/src/compiler/mod.rs | 53 ++++++++------------ apk/src/compiler/table.rs | 7 ++- apk/src/res.rs | 100 ++++++++++++++++++++++++++++++-------- 3 files changed, 104 insertions(+), 56 deletions(-) diff --git a/apk/src/compiler/mod.rs b/apk/src/compiler/mod.rs index 203f5ef5..f69c605f 100644 --- a/apk/src/compiler/mod.rs +++ b/apk/src/compiler/mod.rs @@ -2,8 +2,8 @@ use std::num::NonZeroU8; use crate::manifest::AndroidManifest; use crate::res::{ - Chunk, ResTableConfig, ResTableEntry, ResTableHeader, ResTablePackageHeader, - ResTableTypeHeader, ResTableTypeSpecHeader, ResTableValue, ResValue, ScreenType, + Chunk, ResTableConfig, ResTableEntry, ResTableHeader, ResTablePackageHeader, ResTableValue, + ResValue, ScreenType, }; use anyhow::Result; @@ -44,15 +44,7 @@ pub fn compile_mipmap<'a>(package_name: &str, name: &'a str) -> Result(package_name: &str, name: &'a str) -> Result Chunk { - Chunk::TableType( - ResTableTypeHeader { - id: type_id, - res0: 0, - res1: 0, - entry_count: 1, - entries_start: 88, - config: ResTableConfig { - size: 28 + 36, - imsi: 0, - locale: 0, - screen_type: ScreenType { - orientation: 0, - touchscreen: 0, - density, - }, - input: 0, - screen_size: 0, - version: 4, - unknown: vec![0; 36], + Chunk::TableType { + type_id, + config: ResTableConfig { + size: 28 + 36, + imsi: 0, + locale: 0, + screen_type: ScreenType { + orientation: 0, + touchscreen: 0, + density, }, + input: 0, + screen_size: 0, + version: 4, + unknown: vec![0; 36], }, - vec![0], - vec![Some(ResTableEntry { + entries: vec![Some(ResTableEntry { size: 8, flags: 0, key: 0, @@ -100,7 +85,7 @@ fn mipmap_table_type(type_id: NonZeroU8, density: u16, string_id: u32) -> Chunk data: string_id, }), })], - ) + } } pub struct Mipmap<'a> { diff --git a/apk/src/compiler/table.rs b/apk/src/compiler/table.rs index bec336cf..32b3f63f 100644 --- a/apk/src/compiler/table.rs +++ b/apk/src/compiler/table.rs @@ -91,8 +91,11 @@ impl<'a> Package<'a> { fn lookup_type(&self, id: NonZeroU8) -> Result> { for chunk in self.chunks { - if let Chunk::TableType(header, _offsets, entries) = chunk { - if header.id == id { + if let Chunk::TableType { + type_id, entries, .. + } = chunk + { + if *type_id == id { return Ok(Type { package: self.id, id, diff --git a/apk/src/res.rs b/apk/src/res.rs index 6eccdc5f..1934e85d 100644 --- a/apk/src/res.rs +++ b/apk/src/res.rs @@ -859,16 +859,23 @@ impl ResSpan { pub enum Chunk { Null, StringPool(Vec, Vec>), + // TODO: Remove this header; the number of packages is implied by te number of Chunk::TablePackage elements. Table(ResTableHeader, Vec), Xml(Vec), XmlStartNamespace(ResXmlNodeHeader, ResXmlNamespace), XmlEndNamespace(ResXmlNodeHeader, ResXmlNamespace), + // TODO: Replace ResXmlStartElement, which contains byte offsets. XmlStartElement(ResXmlNodeHeader, ResXmlStartElement, Vec), XmlEndElement(ResXmlNodeHeader, ResXmlEndElement), XmlResourceMap(Vec), + // TODO: Remove this header, it seems to contain fields that are specifically for (de)serialization. TablePackage(ResTablePackageHeader, Vec), - TableType(ResTableTypeHeader, Vec, Vec>), - TableTypeSpec(ResTableTypeSpecHeader, Vec), + TableType { + type_id: NonZeroU8, + config: ResTableConfig, + entries: Vec>, + }, + TableTypeSpec(NonZeroU8, Vec), Unknown, } @@ -1037,13 +1044,16 @@ impl Chunk { Some(ChunkType::TableType) => { tracing::trace!("table type"); let type_header = ResTableTypeHeader::read(r)?; - let mut entries = Vec::with_capacity(type_header.entry_count as usize); - let mut index = Vec::with_capacity(type_header.entry_count as usize); // TODO: Removing this bogus mapping from the API requires rewriting the write() implementation + + // Parse all entry offsets at once so that we don't repeatedly have to seek back. + let mut entry_offsets = Vec::with_capacity(type_header.entry_count as usize); for _ in 0..type_header.entry_count { let offset = r.read_u32::()?; - index.push(offset); + entry_offsets.push(offset); } - for &offset in &index { + + let mut entries = Vec::with_capacity(type_header.entry_count as usize); + for offset in entry_offsets { if offset == 0xffff_ffff { entries.push(None); } else { @@ -1054,7 +1064,12 @@ impl Chunk { entries.push(Some(entry)); } } - Ok(Chunk::TableType(type_header, index, entries)) + + Ok(Chunk::TableType { + type_id: type_header.id, + config: type_header.config, + entries, + }) } Some(ChunkType::TableTypeSpec) => { tracing::trace!("table type spec"); @@ -1063,7 +1078,7 @@ impl Chunk { for c in type_spec.iter_mut() { *c = r.read_u32::()?; } - Ok(Chunk::TableTypeSpec(type_spec_header, type_spec)) + Ok(Chunk::TableTypeSpec(type_spec_header.id, type_spec)) } Some(ChunkType::Unknown) => { tracing::trace!("unknown"); @@ -1107,7 +1122,7 @@ impl Chunk { Ok(()) } - fn end_chunk(self, w: &mut W) -> Result<(u64, u64)> { + fn end_chunk(self, w: &mut W) -> Result<(u64, u64, u64)> { assert_ne!(self.end_header, 0); let end_chunk = w.stream_position()?; let header = ResChunkHeader { @@ -1118,7 +1133,7 @@ impl Chunk { w.seek(SeekFrom::Start(self.start_chunk))?; header.write(w)?; w.seek(SeekFrom::Start(end_chunk))?; - Ok((self.start_chunk, end_chunk)) + Ok((self.start_chunk, self.end_header, end_chunk)) } } match self { @@ -1153,7 +1168,7 @@ impl Chunk { } w.write_i32::(-1)?; } - let (start_chunk, end_chunk) = chunk.end_chunk(w)?; + let (start_chunk, _end_header, end_chunk) = chunk.end_chunk(w)?; w.seek(SeekFrom::Start(start_chunk + 8))?; ResStringPoolHeader { @@ -1251,24 +1266,69 @@ impl Chunk { package_header.write(w)?; w.seek(SeekFrom::Start(end))?; } - Chunk::TableType(type_header, index, entries) => { + Chunk::TableType { + type_id, + config, + entries, + } => { let mut chunk = ChunkWriter::start_chunk(ChunkType::TableType, w)?; + let start_type_header = w.stream_position()?; + let mut type_header = ResTableTypeHeader { + id: *type_id, + res0: 0, + res1: 0, + entry_count: entries.len() as u32, + entries_start: 0, // Will be overwritten later + config: config.clone(), + }; type_header.write(w)?; chunk.end_header(w)?; - for offset in index { - w.write_u32::(*offset)?; + + // Reserve space for index table + for _ in entries { + w.write_u32::(0)?; } - for entry in entries.iter().flatten() { - entry.write(w)?; + + let entries_pos = w.stream_position()?; + // Offset from the beginning of the chunk to the first entry: + let entries_start = entries_pos - chunk.start_chunk; + + // Write out all entries + for (i, entry) in entries.iter().enumerate() { + let mut offset = 0xffff_ffff; + if let Some(entry) = entry { + offset = (w.stream_position()? - entries_pos) as u32; + entry.write(w)?; + } + let pos = w.stream_position()?; + w.seek(SeekFrom::Start( + chunk.end_header + (size_of::() * i) as u64, + ))?; + w.write_u32::(offset)?; + w.seek(SeekFrom::Start(pos))?; } - chunk.end_chunk(w)?; + + let (_, end_header, end_chunk) = chunk.end_chunk(w)?; + + // Update entries_start and rewrite the whole header with it: + w.seek(SeekFrom::Start(start_type_header))?; + type_header.entries_start = entries_start as u32; + type_header.write(w)?; + debug_assert_eq!(w.stream_position()?, end_header); + w.seek(SeekFrom::Start(end_chunk))?; } - Chunk::TableTypeSpec(type_spec_header, type_spec) => { + Chunk::TableTypeSpec(type_id, type_spec) => { let mut chunk = ChunkWriter::start_chunk(ChunkType::TableTypeSpec, w)?; + let type_spec_header = ResTableTypeSpecHeader { + id: *type_id, + res0: 0, + res1: 0, + entry_count: type_spec.len() as u32, + }; type_spec_header.write(w)?; chunk.end_header(w)?; - for spec in type_spec { - w.write_u32::(*spec)?; + for &spec in type_spec { + w.write_u32::(spec)?; } chunk.end_chunk(w)?; } From ffd33851a82123f4b13b6719fe06038770f332e1 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Sun, 17 Aug 2025 01:07:52 +0200 Subject: [PATCH 5/8] apk/res: Parse new sparse `Type` chunk since Android 34 Android 34 replaced most `Type` chunks with few entries with a new `SPARSE` list. Here the offset table is now a combination of a `u16` array index and a `u16` byte offset, allowing it to take up way less space in binary form when few elements are set while still reaching high(er) indexes (typically one or a handful of elements with an index in the hundreds or thousands). --- apk/src/res.rs | 80 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 20 deletions(-) diff --git a/apk/src/res.rs b/apk/src/res.rs index 1934e85d..cb2e3f35 100644 --- a/apk/src/res.rs +++ b/apk/src/res.rs @@ -469,8 +469,8 @@ pub struct ResTableTypeHeader { /// at 1 (corresponding to the value of the type bits in a /// resource identifier). 0 is invalid. pub id: NonZeroU8, - /// Must be 0. - pub res0: u8, + /// Flags. + pub flags: u8, /// Must be 0. pub res1: u16, /// Number of u32 entry indices that follow. @@ -482,10 +482,25 @@ pub struct ResTableTypeHeader { } impl ResTableTypeHeader { + const NO_ENTRY: u32 = 0xffff_ffff; + const fn offset_from16(offset: u16) -> u32 { + if offset == 0xffff { + Self::NO_ENTRY + } else { + offset as u32 * 4 + } + } + + const FLAG_SPARSE: u8 = 1 << 0; + pub fn read(r: &mut (impl Read + Seek)) -> Result { let id = NonZeroU8::new(r.read_u8()?).context("ID of 0 is invalid")?; - let res0 = r.read_u8()?; - debug_assert_eq!(res0, 0, "ResTableTypeHeader reserved field 0 should be 0"); + let flags = r.read_u8()?; + debug_assert_eq!( + flags & !Self::FLAG_SPARSE, + 0, + "Unrecognized ResTableTypeHeader flags" + ); let res1 = r.read_u16::()?; debug_assert_eq!(res1, 0, "ResTableTypeHeader reserved field 1 should be 0"); let entry_count = r.read_u32::()?; @@ -493,7 +508,7 @@ impl ResTableTypeHeader { let config = ResTableConfig::read(r)?; Ok(Self { id, - res0, + flags, res1, entry_count, entries_start, @@ -501,9 +516,13 @@ impl ResTableTypeHeader { }) } + pub fn is_sparse(&self) -> bool { + self.flags & Self::FLAG_SPARSE != 0 + } + pub fn write(&self, w: &mut (impl Write + Seek)) -> Result<()> { w.write_u8(self.id.get())?; - w.write_u8(self.res0)?; + w.write_u8(self.flags)?; w.write_u16::(self.res1)?; w.write_u32::(self.entry_count)?; w.write_u32::(self.entries_start)?; @@ -1047,22 +1066,43 @@ impl Chunk { // Parse all entry offsets at once so that we don't repeatedly have to seek back. let mut entry_offsets = Vec::with_capacity(type_header.entry_count as usize); + let mut high_idx = type_header.entry_count as u16; for _ in 0..type_header.entry_count { - let offset = r.read_u32::()?; - entry_offsets.push(offset); + entry_offsets.push(if type_header.is_sparse() { + let idx = r.read_u16::()?; + high_idx = high_idx.max(idx + 1); + let offset = + ResTableTypeHeader::offset_from16(r.read_u16::()?); + (offset, Some(idx)) + } else { + let offset = r.read_u32::()?; + (offset, None) + }); } - let mut entries = Vec::with_capacity(type_header.entry_count as usize); - for offset in entry_offsets { - if offset == 0xffff_ffff { - entries.push(None); - } else { - r.seek(SeekFrom::Start( - start_pos + type_header.entries_start as u64 + offset as u64, - ))?; - let entry = ResTableEntry::read(r)?; - entries.push(Some(entry)); + // The current scheme of allocating a large vector with mostly None's for sparse data + // may result in high peak memory usage. Since by far most tables in android.jar are + // sparse, we should switch to a HashMap/BTreeMap. + if type_header.is_sparse() { + tracing::trace!( + "Sparse table is occupying {} out of {} `Vec` elements", + type_header.entry_count, + high_idx + ); + } + + let mut entries = vec![None; high_idx as usize]; + for (i, &(offset, idx)) in entry_offsets.iter().enumerate() { + if offset == ResTableTypeHeader::NO_ENTRY { + continue; } + + r.seek(SeekFrom::Start( + start_pos + type_header.entries_start as u64 + offset as u64, + ))?; + let entry = ResTableEntry::read(r)?; + + entries[idx.map_or(i, |idx| idx as usize)] = Some(entry); } Ok(Chunk::TableType { @@ -1275,7 +1315,7 @@ impl Chunk { let start_type_header = w.stream_position()?; let mut type_header = ResTableTypeHeader { id: *type_id, - res0: 0, + flags: 0, // TODO: Enable SPARSE flag if there are lots of empty elements. res1: 0, entry_count: entries.len() as u32, entries_start: 0, // Will be overwritten later @@ -1295,7 +1335,7 @@ impl Chunk { // Write out all entries for (i, entry) in entries.iter().enumerate() { - let mut offset = 0xffff_ffff; + let mut offset = ResTableTypeHeader::NO_ENTRY; if let Some(entry) = entry { offset = (w.stream_position()? - entries_pos) as u32; entry.write(w)?; From e4c1e7263de76db63c233e1b680e3a7569f03e8d Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Fri, 15 Aug 2025 19:48:39 +0200 Subject: [PATCH 6/8] apk/res: Parse new `typesCount` in `TypeSpec` since Android 35 Reserved field 1 is nog used for a number of types, though it has no impact on the what we need to parse. --- apk/src/res.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/apk/src/res.rs b/apk/src/res.rs index cb2e3f35..8bf244f0 100644 --- a/apk/src/res.rs +++ b/apk/src/res.rs @@ -426,8 +426,8 @@ pub struct ResTableTypeSpecHeader { pub id: NonZeroU8, /// Must be 0. pub res0: u8, - /// Must be 0. - pub res1: u16, + /// Used to be reserved, if >0 specifies the number of `ResTable_type` entries for this spec. + pub types_count: u16, /// Number of u32 entry configuration masks that follow. pub entry_count: u32, } @@ -440,16 +440,12 @@ impl ResTableTypeSpecHeader { res0, 0, "ResTableTypeSpecHeader reserved field 0 should be 0" ); - let res1 = r.read_u16::()?; - debug_assert_eq!( - res1, 0, - "ResTableTypeSpecHeader reserved field 1 should be 0" - ); + let types_count = r.read_u16::()?; let entry_count = r.read_u32::()?; Ok(Self { id, res0, - res1, + types_count, entry_count, }) } @@ -457,7 +453,7 @@ impl ResTableTypeSpecHeader { pub fn write(&self, w: &mut impl Write) -> Result<()> { w.write_u8(self.id.get())?; w.write_u8(self.res0)?; - w.write_u16::(self.res1)?; + w.write_u16::(self.types_count)?; w.write_u32::(self.entry_count)?; Ok(()) } @@ -1362,7 +1358,7 @@ impl Chunk { let type_spec_header = ResTableTypeSpecHeader { id: *type_id, res0: 0, - res1: 0, + types_count: 0, entry_count: type_spec.len() as u32, }; type_spec_header.write(w)?; From c75dbbaf3f8d6d31a5878907026e2ee58404d528 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Sun, 17 Aug 2025 17:32:52 +0200 Subject: [PATCH 7/8] apk/res: Parse new 16-bit offset `Type` chunk since Android 35 Probably after using only 16-bits for the offset the behind the `SPARSE` flag, it was realized that 16-bit offsets could also reduce table size when applied to non-sparse `Type` lists. --- apk/src/res.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/apk/src/res.rs b/apk/src/res.rs index 8bf244f0..5cf0f829 100644 --- a/apk/src/res.rs +++ b/apk/src/res.rs @@ -488,12 +488,13 @@ impl ResTableTypeHeader { } const FLAG_SPARSE: u8 = 1 << 0; + const FLAG_OFFSET16: u8 = 1 << 1; pub fn read(r: &mut (impl Read + Seek)) -> Result { let id = NonZeroU8::new(r.read_u8()?).context("ID of 0 is invalid")?; let flags = r.read_u8()?; debug_assert_eq!( - flags & !Self::FLAG_SPARSE, + flags & !(Self::FLAG_SPARSE | Self::FLAG_OFFSET16), 0, "Unrecognized ResTableTypeHeader flags" ); @@ -516,9 +517,18 @@ impl ResTableTypeHeader { self.flags & Self::FLAG_SPARSE != 0 } + pub fn is_offset16(&self) -> bool { + self.flags & Self::FLAG_OFFSET16 != 0 + } + pub fn write(&self, w: &mut (impl Write + Seek)) -> Result<()> { w.write_u8(self.id.get())?; w.write_u8(self.flags)?; + debug_assert_eq!( + self.flags & Self::FLAG_OFFSET16, + 0, + "Writing OFFSET16 ResTableTypeHeader is not yet implemented" + ); w.write_u16::(self.res1)?; w.write_u32::(self.entry_count)?; w.write_u32::(self.entries_start)?; @@ -1070,6 +1080,10 @@ impl Chunk { let offset = ResTableTypeHeader::offset_from16(r.read_u16::()?); (offset, Some(idx)) + } else if type_header.is_offset16() { + let offset = + ResTableTypeHeader::offset_from16(r.read_u16::()?); + (offset, None) } else { let offset = r.read_u32::()?; (offset, None) From 4f710e8947fd66182c64258a500c2578e8a96af2 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Sun, 17 Aug 2025 17:47:20 +0200 Subject: [PATCH 8/8] apk/res: Parse new `COMPACT` `Type` `Entry` elements since Android 35 Android 35 seemed to be on a quest to reduce resource table sizes by using smaller types for offsets, building on Android-34's sparse `Type` lists and now also using less bytes for compact entries where the value is only 4 bytes. --- apk/src/res.rs | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/apk/src/res.rs b/apk/src/res.rs index 5cf0f829..7964edfc 100644 --- a/apk/src/res.rs +++ b/apk/src/res.rs @@ -629,11 +629,33 @@ impl ResTableEntry { const FLAG_COMPLEX: u16 = 0x1; const FLAG_PUBLIC: u16 = 0x2; const FLAG_WEAK: u16 = 0x4; + const FLAG_COMPACT: u16 = 0x8; pub fn read(r: &mut impl Read) -> Result { let size = r.read_u16::()?; let flags = r.read_u16::()?; let key = r.read_u32::()?; + if flags & Self::FLAG_COMPACT != 0 { + // Upper 8 bits are dataType, lower 8 bits remain the flags that we already know about: + let data_type = flags >> 8; + let flags = flags & 0xff; + debug_assert_eq!( + flags & !(Self::FLAG_COMPACT | Self::FLAG_PUBLIC | Self::FLAG_WEAK), + 0, + "Unrecognized COMPACT ResTableEntry flags" + ); + + // If compact, the first u16 (size) is the key and the last u32 is the data: + let data = key; + let key = size as u32; + + return Ok(Self { + size: 8, + flags, + key, + value: ResTableValue::Compact(data_type as u8, data), + }); + } debug_assert_eq!( flags & !(Self::FLAG_COMPLEX | Self::FLAG_PUBLIC | Self::FLAG_WEAK), 0, @@ -656,6 +678,14 @@ impl ResTableEntry { pub fn write(&self, w: &mut impl Write) -> Result<()> { w.write_u16::(self.size)?; + // TODO: The user likely shouldn't be able to create ResTableMapEntry structures themselves, + // but instead rely on the serializer to wrap their ResTableValue variants in the + // corresponding structure. + debug_assert_eq!( + self.flags & Self::FLAG_COMPACT, + 0, + "Writing COMPACT ResTableEntry is not yet implemented" + ); w.write_u16::(self.flags)?; w.write_u32::(self.key)?; self.value.write(w)?; @@ -667,6 +697,7 @@ impl ResTableEntry { pub enum ResTableValue { Simple(ResValue), Complex(ResTableMapEntry, Vec), + Compact(u8, u32), } impl ResTableValue { @@ -693,6 +724,7 @@ impl ResTableValue { entry.write(w)?; } } + Self::Compact(_data_type, _data) => todo!(), } Ok(()) }