|
| 1 | +use bytes::Bytes; |
| 2 | + |
| 3 | +use crate::{RtmpMessageSerializeError, error::FlvVideoTagParseError}; |
| 4 | + |
| 5 | +use super::mod_ex::resolve_mod_ex; |
| 6 | +use super::video::{VideoTagFrameType, parse_composition_time}; |
| 7 | + |
| 8 | +/// Parsed Enhanced RTMP video tag. |
| 9 | +#[derive(Debug, Clone)] |
| 10 | +pub enum ExVideoTag { |
| 11 | + StartSeek, |
| 12 | + EndSeek, |
| 13 | + VideoBody { |
| 14 | + four_cc: ExVideoFourCc, |
| 15 | + packet: ExVideoPacket, |
| 16 | + frame_type: VideoTagFrameType, |
| 17 | + timestamp_offset_nanos: Option<u32>, |
| 18 | + }, |
| 19 | +} |
| 20 | + |
| 21 | +impl ExVideoTag { |
| 22 | + pub fn frame_type(&self) -> VideoTagFrameType { |
| 23 | + match self { |
| 24 | + ExVideoTag::StartSeek | ExVideoTag::EndSeek => { |
| 25 | + VideoTagFrameType::VideoInfoOrCommandFrame |
| 26 | + } |
| 27 | + ExVideoTag::VideoBody { frame_type, .. } => *frame_type, |
| 28 | + } |
| 29 | + } |
| 30 | +} |
| 31 | + |
| 32 | +/// FourCC video codec identifiers for Enhanced RTMP. |
| 33 | +#[derive(Debug, Clone, Copy, PartialEq)] |
| 34 | +pub enum ExVideoFourCc { |
| 35 | + /// VP8 (`vp08`) |
| 36 | + Vp08, |
| 37 | + /// VP9 (`vp09`) |
| 38 | + Vp09, |
| 39 | + /// AV1 (`av01`) |
| 40 | + Av01, |
| 41 | + /// H.264/AVC (`avc1`) |
| 42 | + Avc1, |
| 43 | + /// H.265/HEVC (`hvc1`) |
| 44 | + Hvc1, |
| 45 | +} |
| 46 | + |
| 47 | +impl ExVideoFourCc { |
| 48 | + fn from_raw(bytes: [u8; 4]) -> Result<Self, FlvVideoTagParseError> { |
| 49 | + match &bytes { |
| 50 | + b"vp08" => Ok(Self::Vp08), |
| 51 | + b"vp09" => Ok(Self::Vp09), |
| 52 | + b"av01" => Ok(Self::Av01), |
| 53 | + b"avc1" => Ok(Self::Avc1), |
| 54 | + b"hvc1" => Ok(Self::Hvc1), |
| 55 | + _ => Err(FlvVideoTagParseError::UnknownVideoFourCc(bytes)), |
| 56 | + } |
| 57 | + } |
| 58 | + |
| 59 | + #[allow(unused)] |
| 60 | + fn to_raw(self) -> [u8; 4] { |
| 61 | + match self { |
| 62 | + Self::Vp08 => *b"vp08", |
| 63 | + Self::Vp09 => *b"vp09", |
| 64 | + Self::Av01 => *b"av01", |
| 65 | + Self::Avc1 => *b"avc1", |
| 66 | + Self::Hvc1 => *b"hvc1", |
| 67 | + } |
| 68 | + } |
| 69 | + |
| 70 | + /// Returns true if this codec uses SI24 CompositionTime in CodedFrames. |
| 71 | + /// Per the spec, only AVC and HEVC carry composition time offset. |
| 72 | + fn has_composition_time(self) -> bool { |
| 73 | + matches!(self, Self::Avc1 | Self::Hvc1) |
| 74 | + } |
| 75 | +} |
| 76 | + |
| 77 | +/// Semantic video packet type after parsing. |
| 78 | +/// |
| 79 | +/// This represents the resolved body content. Wire-only signals (`ModEx`, `Multitrack`) |
| 80 | +/// are handled during parsing and do not appear here. `CodedFrames` and `CodedFramesX` |
| 81 | +/// from the wire are unified — `CodedFramesX` sets `composition_time = 0`. |
| 82 | +#[derive(Debug, Clone)] |
| 83 | +pub enum ExVideoPacket { |
| 84 | + /// Decoder configuration record (SPS/PPS, VPS, etc.) |
| 85 | + SequenceStart(Bytes), |
| 86 | + /// Video frame data with composition time offset. |
| 87 | + /// For codecs without composition time (VP8, VP9, AV1), `composition_time` is 0. |
| 88 | + /// Encompasses both wire types `CodedFrames` (explicit SI24) and `CodedFramesX` (implicit 0). |
| 89 | + CodedFrames { composition_time: i32, data: Bytes }, |
| 90 | + /// End of sequence marker. No payload. |
| 91 | + SequenceEnd, |
| 92 | + /// AMF-encoded metadata (e.g. HDR colorInfo). |
| 93 | + Metadata(Bytes), |
| 94 | + /// Carriage of bitstream in MPEG-2 TS format. |
| 95 | + /// Mutually exclusive with SequenceStart. |
| 96 | + Mpeg2TsSequenceStart(Bytes), |
| 97 | +} |
| 98 | + |
| 99 | +#[derive(Debug, Clone, Copy, PartialEq)] |
| 100 | +pub(super) enum ExVideoPacketType { |
| 101 | + SequenceStart, |
| 102 | + CodedFrames, |
| 103 | + SequenceEnd, |
| 104 | + CodedFramesX, |
| 105 | + Metadata, |
| 106 | + Mpeg2TsSequenceStart, |
| 107 | + Multitrack, |
| 108 | + ModEx, |
| 109 | +} |
| 110 | + |
| 111 | +impl ExVideoPacketType { |
| 112 | + pub(super) fn from_raw(value: u8) -> Result<Self, FlvVideoTagParseError> { |
| 113 | + match value { |
| 114 | + 0 => Ok(Self::SequenceStart), |
| 115 | + 1 => Ok(Self::CodedFrames), |
| 116 | + 2 => Ok(Self::SequenceEnd), |
| 117 | + 3 => Ok(Self::CodedFramesX), |
| 118 | + 4 => Ok(Self::Metadata), |
| 119 | + 5 => Ok(Self::Mpeg2TsSequenceStart), |
| 120 | + 6 => Ok(Self::Multitrack), |
| 121 | + 7 => Ok(Self::ModEx), |
| 122 | + _ => Err(FlvVideoTagParseError::UnknownExVideoPacketType(value)), |
| 123 | + } |
| 124 | + } |
| 125 | + |
| 126 | + fn into_raw(self) -> u8 { |
| 127 | + match self { |
| 128 | + Self::SequenceStart => 0, |
| 129 | + Self::CodedFrames => 1, |
| 130 | + Self::SequenceEnd => 2, |
| 131 | + Self::CodedFramesX => 3, |
| 132 | + Self::Metadata => 4, |
| 133 | + Self::Mpeg2TsSequenceStart => 5, |
| 134 | + Self::Multitrack => 6, |
| 135 | + Self::ModEx => 7, |
| 136 | + } |
| 137 | + } |
| 138 | +} |
| 139 | + |
| 140 | +impl ExVideoTag { |
| 141 | + /// Parses Enhanced RTMP video tag. |
| 142 | + /// First byte: `[isExHeader(1) | VideoFrameType(3 bits) | VideoPacketType(4 bits)]` |
| 143 | + pub fn parse(data: Bytes) -> Result<Self, FlvVideoTagParseError> { |
| 144 | + if data.is_empty() { |
| 145 | + return Err(FlvVideoTagParseError::TooShort); |
| 146 | + } |
| 147 | + |
| 148 | + let frame_type = VideoTagFrameType::from_raw((data[0] & 0b01110000) >> 4)?; |
| 149 | + let packet_type = ExVideoPacketType::from_raw(data[0] & 0b00001111)?; |
| 150 | + |
| 151 | + // Process ModEx to resolve the final packet type and collect modifiers. |
| 152 | + let (packet_type, rest, timestamp_offset_nanos) = if packet_type == ExVideoPacketType::ModEx |
| 153 | + { |
| 154 | + let result = resolve_mod_ex(data.slice(1..))?; |
| 155 | + ( |
| 156 | + result.packet_type, |
| 157 | + result.remaining, |
| 158 | + result.timestamp_offset_nanos, |
| 159 | + ) |
| 160 | + } else { |
| 161 | + (packet_type, data.slice(1..), None) |
| 162 | + }; |
| 163 | + |
| 164 | + // Per spec: if frame_type is Command and packet_type is not Metadata, |
| 165 | + // the payload is a single UI8 VideoCommand with no FourCC or video body. |
| 166 | + if frame_type == VideoTagFrameType::VideoInfoOrCommandFrame |
| 167 | + && packet_type != ExVideoPacketType::Metadata |
| 168 | + { |
| 169 | + if rest.is_empty() { |
| 170 | + return Err(FlvVideoTagParseError::TooShort); |
| 171 | + } |
| 172 | + let content = match rest[0] { |
| 173 | + 0 => ExVideoTag::StartSeek, |
| 174 | + 1 => ExVideoTag::EndSeek, |
| 175 | + v => return Err(FlvVideoTagParseError::UnknownVideoCommand(v)), |
| 176 | + }; |
| 177 | + return Ok(content); |
| 178 | + } |
| 179 | + |
| 180 | + // Read FourCC (4 bytes), present for all non-command packet types. |
| 181 | + if rest.len() < 4 { |
| 182 | + return Err(FlvVideoTagParseError::TooShort); |
| 183 | + } |
| 184 | + let four_cc = ExVideoFourCc::from_raw([rest[0], rest[1], rest[2], rest[3]])?; |
| 185 | + let body_data = rest.slice(4..); |
| 186 | + |
| 187 | + let packet = match packet_type { |
| 188 | + ExVideoPacketType::SequenceStart => ExVideoPacket::SequenceStart(body_data), |
| 189 | + ExVideoPacketType::CodedFrames => Self::parse_coded_frames(body_data, four_cc)?, |
| 190 | + ExVideoPacketType::CodedFramesX => ExVideoPacket::CodedFrames { |
| 191 | + composition_time: 0, |
| 192 | + data: body_data, |
| 193 | + }, |
| 194 | + ExVideoPacketType::SequenceEnd => ExVideoPacket::SequenceEnd, |
| 195 | + ExVideoPacketType::Metadata => ExVideoPacket::Metadata(body_data), |
| 196 | + ExVideoPacketType::Mpeg2TsSequenceStart => { |
| 197 | + ExVideoPacket::Mpeg2TsSequenceStart(body_data) |
| 198 | + } |
| 199 | + ExVideoPacketType::Multitrack => { |
| 200 | + // TODO: implement multitrack parsing (AvMultitrackType + per-track FourCC/data) |
| 201 | + return Err(FlvVideoTagParseError::UnsupportedPacketType( |
| 202 | + packet_type.into_raw(), |
| 203 | + )); |
| 204 | + } |
| 205 | + ExVideoPacketType::ModEx => { |
| 206 | + unreachable!("ModEx should have been resolved above") |
| 207 | + } |
| 208 | + }; |
| 209 | + |
| 210 | + Ok(ExVideoTag::VideoBody { |
| 211 | + four_cc, |
| 212 | + packet, |
| 213 | + frame_type, |
| 214 | + timestamp_offset_nanos, |
| 215 | + }) |
| 216 | + } |
| 217 | + |
| 218 | + /// Parses CodedFrames body. AVC and HEVC include an SI24 composition |
| 219 | + /// time prefix; other codecs do not (composition_time is set to 0 |
| 220 | + /// in the parsed representation). |
| 221 | + fn parse_coded_frames( |
| 222 | + data: Bytes, |
| 223 | + four_cc: ExVideoFourCc, |
| 224 | + ) -> Result<ExVideoPacket, FlvVideoTagParseError> { |
| 225 | + if four_cc.has_composition_time() { |
| 226 | + if data.len() < 3 { |
| 227 | + return Err(FlvVideoTagParseError::TooShort); |
| 228 | + } |
| 229 | + let composition_time = parse_composition_time(&data[0..3]); |
| 230 | + Ok(ExVideoPacket::CodedFrames { |
| 231 | + composition_time, |
| 232 | + data: data.slice(3..), |
| 233 | + }) |
| 234 | + } else { |
| 235 | + Ok(ExVideoPacket::CodedFrames { |
| 236 | + composition_time: 0, |
| 237 | + data, |
| 238 | + }) |
| 239 | + } |
| 240 | + } |
| 241 | + |
| 242 | + pub fn serialize(&self) -> Result<Bytes, RtmpMessageSerializeError> { |
| 243 | + unimplemented!() |
| 244 | + } |
| 245 | +} |
0 commit comments