Skip to content

Commit 1bb4578

Browse files
committed
review changes
1 parent 79bb79c commit 1bb4578

File tree

4 files changed

+387
-391
lines changed

4 files changed

+387
-391
lines changed

rtmp/src/flv/ex_video.rs

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
use bytes::Bytes;
2+
3+
use crate::{RtmpMessageSerializeError, error::FlvVideoTagParseError};
4+
5+
use super::mod_ex::resolve_mod_ex;
6+
use super::video::{VideoTagFrameType, parse_composition_time};
7+
8+
/// Parsed Enhanced RTMP video tag.
9+
#[derive(Debug, Clone)]
10+
pub enum ExVideoTag {
11+
StartSeek,
12+
EndSeek,
13+
VideoBody {
14+
four_cc: ExVideoFourCc,
15+
packet: ExVideoPacket,
16+
frame_type: VideoTagFrameType,
17+
timestamp_offset_nanos: Option<u32>,
18+
},
19+
}
20+
21+
impl ExVideoTag {
22+
pub fn frame_type(&self) -> VideoTagFrameType {
23+
match self {
24+
ExVideoTag::StartSeek | ExVideoTag::EndSeek => {
25+
VideoTagFrameType::VideoInfoOrCommandFrame
26+
}
27+
ExVideoTag::VideoBody { frame_type, .. } => *frame_type,
28+
}
29+
}
30+
}
31+
32+
/// FourCC video codec identifiers for Enhanced RTMP.
33+
#[derive(Debug, Clone, Copy, PartialEq)]
34+
pub enum ExVideoFourCc {
35+
/// VP8 (`vp08`)
36+
Vp08,
37+
/// VP9 (`vp09`)
38+
Vp09,
39+
/// AV1 (`av01`)
40+
Av01,
41+
/// H.264/AVC (`avc1`)
42+
Avc1,
43+
/// H.265/HEVC (`hvc1`)
44+
Hvc1,
45+
}
46+
47+
impl ExVideoFourCc {
48+
fn from_raw(bytes: [u8; 4]) -> Result<Self, FlvVideoTagParseError> {
49+
match &bytes {
50+
b"vp08" => Ok(Self::Vp08),
51+
b"vp09" => Ok(Self::Vp09),
52+
b"av01" => Ok(Self::Av01),
53+
b"avc1" => Ok(Self::Avc1),
54+
b"hvc1" => Ok(Self::Hvc1),
55+
_ => Err(FlvVideoTagParseError::UnknownVideoFourCc(bytes)),
56+
}
57+
}
58+
59+
#[allow(unused)]
60+
fn to_raw(self) -> [u8; 4] {
61+
match self {
62+
Self::Vp08 => *b"vp08",
63+
Self::Vp09 => *b"vp09",
64+
Self::Av01 => *b"av01",
65+
Self::Avc1 => *b"avc1",
66+
Self::Hvc1 => *b"hvc1",
67+
}
68+
}
69+
70+
/// Returns true if this codec uses SI24 CompositionTime in CodedFrames.
71+
/// Per the spec, only AVC and HEVC carry composition time offset.
72+
fn has_composition_time(self) -> bool {
73+
matches!(self, Self::Avc1 | Self::Hvc1)
74+
}
75+
}
76+
77+
/// Semantic video packet type after parsing.
78+
///
79+
/// This represents the resolved body content. Wire-only signals (`ModEx`, `Multitrack`)
80+
/// are handled during parsing and do not appear here. `CodedFrames` and `CodedFramesX`
81+
/// from the wire are unified — `CodedFramesX` sets `composition_time = 0`.
82+
#[derive(Debug, Clone)]
83+
pub enum ExVideoPacket {
84+
/// Decoder configuration record (SPS/PPS, VPS, etc.)
85+
SequenceStart(Bytes),
86+
/// Video frame data with composition time offset.
87+
/// For codecs without composition time (VP8, VP9, AV1), `composition_time` is 0.
88+
/// Encompasses both wire types `CodedFrames` (explicit SI24) and `CodedFramesX` (implicit 0).
89+
CodedFrames { composition_time: i32, data: Bytes },
90+
/// End of sequence marker. No payload.
91+
SequenceEnd,
92+
/// AMF-encoded metadata (e.g. HDR colorInfo).
93+
Metadata(Bytes),
94+
/// Carriage of bitstream in MPEG-2 TS format.
95+
/// Mutually exclusive with SequenceStart.
96+
Mpeg2TsSequenceStart(Bytes),
97+
}
98+
99+
#[derive(Debug, Clone, Copy, PartialEq)]
100+
pub(super) enum ExVideoPacketType {
101+
SequenceStart,
102+
CodedFrames,
103+
SequenceEnd,
104+
CodedFramesX,
105+
Metadata,
106+
Mpeg2TsSequenceStart,
107+
Multitrack,
108+
ModEx,
109+
}
110+
111+
impl ExVideoPacketType {
112+
pub(super) fn from_raw(value: u8) -> Result<Self, FlvVideoTagParseError> {
113+
match value {
114+
0 => Ok(Self::SequenceStart),
115+
1 => Ok(Self::CodedFrames),
116+
2 => Ok(Self::SequenceEnd),
117+
3 => Ok(Self::CodedFramesX),
118+
4 => Ok(Self::Metadata),
119+
5 => Ok(Self::Mpeg2TsSequenceStart),
120+
6 => Ok(Self::Multitrack),
121+
7 => Ok(Self::ModEx),
122+
_ => Err(FlvVideoTagParseError::UnknownExVideoPacketType(value)),
123+
}
124+
}
125+
126+
fn into_raw(self) -> u8 {
127+
match self {
128+
Self::SequenceStart => 0,
129+
Self::CodedFrames => 1,
130+
Self::SequenceEnd => 2,
131+
Self::CodedFramesX => 3,
132+
Self::Metadata => 4,
133+
Self::Mpeg2TsSequenceStart => 5,
134+
Self::Multitrack => 6,
135+
Self::ModEx => 7,
136+
}
137+
}
138+
}
139+
140+
impl ExVideoTag {
141+
/// Parses Enhanced RTMP video tag.
142+
/// First byte: `[isExHeader(1) | VideoFrameType(3 bits) | VideoPacketType(4 bits)]`
143+
pub fn parse(data: Bytes) -> Result<Self, FlvVideoTagParseError> {
144+
if data.is_empty() {
145+
return Err(FlvVideoTagParseError::TooShort);
146+
}
147+
148+
let frame_type = VideoTagFrameType::from_raw((data[0] & 0b01110000) >> 4)?;
149+
let packet_type = ExVideoPacketType::from_raw(data[0] & 0b00001111)?;
150+
151+
// Process ModEx to resolve the final packet type and collect modifiers.
152+
let (packet_type, rest, timestamp_offset_nanos) = if packet_type == ExVideoPacketType::ModEx
153+
{
154+
let result = resolve_mod_ex(data.slice(1..))?;
155+
(
156+
result.packet_type,
157+
result.remaining,
158+
result.timestamp_offset_nanos,
159+
)
160+
} else {
161+
(packet_type, data.slice(1..), None)
162+
};
163+
164+
// Per spec: if frame_type is Command and packet_type is not Metadata,
165+
// the payload is a single UI8 VideoCommand with no FourCC or video body.
166+
if frame_type == VideoTagFrameType::VideoInfoOrCommandFrame
167+
&& packet_type != ExVideoPacketType::Metadata
168+
{
169+
if rest.is_empty() {
170+
return Err(FlvVideoTagParseError::TooShort);
171+
}
172+
let content = match rest[0] {
173+
0 => ExVideoTag::StartSeek,
174+
1 => ExVideoTag::EndSeek,
175+
v => return Err(FlvVideoTagParseError::UnknownVideoCommand(v)),
176+
};
177+
return Ok(content);
178+
}
179+
180+
// Read FourCC (4 bytes), present for all non-command packet types.
181+
if rest.len() < 4 {
182+
return Err(FlvVideoTagParseError::TooShort);
183+
}
184+
let four_cc = ExVideoFourCc::from_raw([rest[0], rest[1], rest[2], rest[3]])?;
185+
let body_data = rest.slice(4..);
186+
187+
let packet = match packet_type {
188+
ExVideoPacketType::SequenceStart => ExVideoPacket::SequenceStart(body_data),
189+
ExVideoPacketType::CodedFrames => Self::parse_coded_frames(body_data, four_cc)?,
190+
ExVideoPacketType::CodedFramesX => ExVideoPacket::CodedFrames {
191+
composition_time: 0,
192+
data: body_data,
193+
},
194+
ExVideoPacketType::SequenceEnd => ExVideoPacket::SequenceEnd,
195+
ExVideoPacketType::Metadata => ExVideoPacket::Metadata(body_data),
196+
ExVideoPacketType::Mpeg2TsSequenceStart => {
197+
ExVideoPacket::Mpeg2TsSequenceStart(body_data)
198+
}
199+
ExVideoPacketType::Multitrack => {
200+
// TODO: implement multitrack parsing (AvMultitrackType + per-track FourCC/data)
201+
return Err(FlvVideoTagParseError::UnsupportedPacketType(
202+
packet_type.into_raw(),
203+
));
204+
}
205+
ExVideoPacketType::ModEx => {
206+
unreachable!("ModEx should have been resolved above")
207+
}
208+
};
209+
210+
Ok(ExVideoTag::VideoBody {
211+
four_cc,
212+
packet,
213+
frame_type,
214+
timestamp_offset_nanos,
215+
})
216+
}
217+
218+
/// Parses CodedFrames body. AVC and HEVC include an SI24 composition
219+
/// time prefix; other codecs do not (composition_time is set to 0
220+
/// in the parsed representation).
221+
fn parse_coded_frames(
222+
data: Bytes,
223+
four_cc: ExVideoFourCc,
224+
) -> Result<ExVideoPacket, FlvVideoTagParseError> {
225+
if four_cc.has_composition_time() {
226+
if data.len() < 3 {
227+
return Err(FlvVideoTagParseError::TooShort);
228+
}
229+
let composition_time = parse_composition_time(&data[0..3]);
230+
Ok(ExVideoPacket::CodedFrames {
231+
composition_time,
232+
data: data.slice(3..),
233+
})
234+
} else {
235+
Ok(ExVideoPacket::CodedFrames {
236+
composition_time: 0,
237+
data,
238+
})
239+
}
240+
}
241+
242+
pub fn serialize(&self) -> Result<Bytes, RtmpMessageSerializeError> {
243+
unimplemented!()
244+
}
245+
}

rtmp/src/flv/mod.rs

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,46 @@
11
mod audio;
2+
mod ex_video;
3+
mod mod_ex;
24
mod video;
3-
mod video_enhanced;
45

56
pub use audio::*;
7+
use bytes::Bytes;
8+
pub use ex_video::*;
69
pub use video::*;
7-
pub use video_enhanced::*;
10+
11+
use crate::{FlvVideoTagParseError, RtmpMessageSerializeError};
12+
13+
const EX_HEADER_BIT: u8 = 0b10000000;
14+
15+
/// Top-level FLV video data, supporting both legacy and Enhanced RTMP formats.
16+
///
17+
/// Legacy format: <https://veovera.org/docs/legacy/video-file-format-v10-1-spec.pdf#page=74>
18+
/// Enhanced RTMP: <https://veovera.org/docs/enhanced/enhanced-rtmp-v2.pdf>
19+
#[derive(Debug, Clone)]
20+
pub enum FlvVideoData {
21+
Legacy(VideoTag),
22+
Enhanced(ExVideoTag),
23+
}
24+
25+
impl FlvVideoData {
26+
/// Parses flv `VIDEODATA`. Checks the IsExHeader bit in the first byte
27+
/// and dispatches to either legacy or Enhanced RTMP parsing.
28+
pub fn parse(data: Bytes) -> Result<Self, FlvVideoTagParseError> {
29+
if data.is_empty() {
30+
return Err(FlvVideoTagParseError::TooShort);
31+
}
32+
33+
if data[0] & EX_HEADER_BIT != 0 {
34+
ExVideoTag::parse(data).map(FlvVideoData::Enhanced)
35+
} else {
36+
VideoTag::parse(data).map(FlvVideoData::Legacy)
37+
}
38+
}
39+
40+
pub fn serialize(&self) -> Result<Bytes, RtmpMessageSerializeError> {
41+
match self {
42+
FlvVideoData::Legacy(tag) => tag.serialize(),
43+
FlvVideoData::Enhanced(tag) => tag.serialize(),
44+
}
45+
}
46+
}

rtmp/src/flv/mod_ex.rs

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
use bytes::Bytes;
2+
3+
use crate::error::FlvVideoTagParseError;
4+
5+
use super::ex_video::ExVideoPacketType;
6+
7+
/// Enhanced RTMP ModEx sub-type for video packets.
8+
#[derive(Debug, Clone, Copy, PartialEq)]
9+
enum VideoPacketModExType {
10+
/// Nanosecond precision timestamp offset (UI24, max 999,999 ns).
11+
TimestampOffsetNano,
12+
}
13+
14+
impl VideoPacketModExType {
15+
fn from_raw(value: u8) -> Result<Self, FlvVideoTagParseError> {
16+
match value {
17+
0 => Ok(Self::TimestampOffsetNano),
18+
_ => Err(FlvVideoTagParseError::UnknownVideoPacketModExType(value)),
19+
}
20+
}
21+
22+
#[allow(unused)]
23+
fn into_raw(self) -> u8 {
24+
match self {
25+
Self::TimestampOffsetNano => 0,
26+
}
27+
}
28+
}
29+
30+
/// Result of resolving ModEx prefixes from the wire.
31+
pub(super) struct ModExResult {
32+
pub packet_type: ExVideoPacketType,
33+
pub remaining: Bytes,
34+
pub timestamp_offset_nanos: Option<u32>,
35+
}
36+
37+
/// Processes the ModEx prefix loop, returning the resolved packet type,
38+
/// remaining data, and any collected modifiers (e.g. nanosecond timestamp offset).
39+
///
40+
/// Each ModEx iteration:
41+
/// 1. UI8 + 1 data size (if 256, use UI16 + 1)
42+
/// 2. ModEx data payload
43+
/// 3. `[VideoPacketModExType(4 bits) | ExVideoPacketType(4 bits)]`
44+
/// 4. Interpret data based on ModExType, then check if PacketType is another ModEx.
45+
pub(super) fn resolve_mod_ex(data: Bytes) -> Result<ModExResult, FlvVideoTagParseError> {
46+
let mut offset: usize = 0;
47+
let mut timestamp_offset_nanos: Option<u32> = None;
48+
49+
loop {
50+
// Read ModEx data size: UI8 + 1 (range 1..=256)
51+
if data.len() < offset + 1 {
52+
return Err(FlvVideoTagParseError::TooShort);
53+
}
54+
let mut mod_ex_data_size = data[offset] as usize + 1;
55+
offset += 1;
56+
57+
if mod_ex_data_size == 256 {
58+
if data.len() < offset + 2 {
59+
return Err(FlvVideoTagParseError::TooShort);
60+
}
61+
mod_ex_data_size = u16::from_be_bytes([data[offset], data[offset + 1]]) as usize + 1;
62+
offset += 2;
63+
}
64+
65+
if data.len() < offset + mod_ex_data_size {
66+
return Err(FlvVideoTagParseError::TooShort);
67+
}
68+
let mod_ex_data_start = offset;
69+
offset += mod_ex_data_size;
70+
71+
// Next byte: [VideoPacketModExType(4 bits) | ExVideoPacketType(4 bits)]
72+
if data.len() < offset + 1 {
73+
return Err(FlvVideoTagParseError::TooShort);
74+
}
75+
let mod_ex_type = VideoPacketModExType::from_raw((data[offset] & 0b11110000) >> 4)?;
76+
let next_packet_type = ExVideoPacketType::from_raw(data[offset] & 0b00001111)?;
77+
offset += 1;
78+
79+
match mod_ex_type {
80+
VideoPacketModExType::TimestampOffsetNano => {
81+
let mod_ex_data = &data[mod_ex_data_start..mod_ex_data_start + mod_ex_data_size];
82+
if mod_ex_data.len() >= 3 {
83+
timestamp_offset_nanos = Some(u32::from_be_bytes([
84+
0,
85+
mod_ex_data[0],
86+
mod_ex_data[1],
87+
mod_ex_data[2],
88+
]));
89+
}
90+
}
91+
}
92+
93+
if next_packet_type != ExVideoPacketType::ModEx {
94+
return Ok(ModExResult {
95+
packet_type: next_packet_type,
96+
remaining: data.slice(offset..),
97+
timestamp_offset_nanos,
98+
});
99+
}
100+
}
101+
}

0 commit comments

Comments
 (0)