From 0dc08fe42e3a3b1a4e1ec85eeb3a0089b513d15e Mon Sep 17 00:00:00 2001 From: Deepnarayan Sett Date: Sat, 23 Aug 2025 19:53:57 +0530 Subject: [PATCH 1/5] Ported ES Module to Rust --- src/lib_ccx/ccx_decoders_vbi.h | 1 - src/lib_ccx/es_functions.c | 6 + src/lib_ccx/lib_ccx.h | 2 + src/rust/build.rs | 5 + src/rust/lib_ccxr/src/activity.rs | 25 + src/rust/lib_ccxr/src/common/bitstream.rs | 131 ++++- src/rust/lib_ccxr/src/time/units.rs | 12 +- src/rust/src/es/core.rs | 226 +++++++++ src/rust/src/es/eau.rs | 143 ++++++ src/rust/src/es/gop.rs | 211 ++++++++ src/rust/src/es/mod.rs | 62 +++ src/rust/src/es/pic.rs | 339 +++++++++++++ src/rust/src/es/seq.rs | 214 +++++++++ src/rust/src/es/userdata.rs | 555 ++++++++++++++++++++++ src/rust/src/lib.rs | 7 +- src/rust/src/libccxr_exports/time.rs | 8 +- 16 files changed, 1934 insertions(+), 13 deletions(-) create mode 100644 src/rust/src/es/core.rs create mode 100644 src/rust/src/es/eau.rs create mode 100644 src/rust/src/es/gop.rs create mode 100644 src/rust/src/es/mod.rs create mode 100644 src/rust/src/es/pic.rs create mode 100644 src/rust/src/es/seq.rs create mode 100644 src/rust/src/es/userdata.rs diff --git a/src/lib_ccx/ccx_decoders_vbi.h b/src/lib_ccx/ccx_decoders_vbi.h index 08ca2efba..b7752c2fe 100644 --- a/src/lib_ccx/ccx_decoders_vbi.h +++ b/src/lib_ccx/ccx_decoders_vbi.h @@ -25,5 +25,4 @@ struct ccx_decoder_vbi_ctx }; -int decode_vbi(struct lib_cc_decode *dec_ctx, uint8_t field, unsigned char *buffer, size_t len, struct cc_subtitle *sub); #endif diff --git a/src/lib_ccx/es_functions.c b/src/lib_ccx/es_functions.c index 5da94cd39..ccd4ff44a 100644 --- a/src/lib_ccx/es_functions.c +++ b/src/lib_ccx/es_functions.c @@ -22,8 +22,14 @@ static int read_pic_data(struct bitstream *esstream); /* Process a mpeg-2 data stream with "length" bytes in buffer "data". * The number of processed bytes is returned. * Defined in ISO/IEC 13818-2 6.2 */ +#ifndef DISABLE_RUST +size_t ccxr_process_m2v(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, unsigned char *data, size_t length, struct cc_subtitle *sub); +#endif size_t process_m2v(struct encoder_ctx *enc_ctx, struct lib_cc_decode *dec_ctx, unsigned char *data, size_t length, struct cc_subtitle *sub) { +#ifndef DISABLE_RUST + return ccxr_process_m2v(enc_ctx, dec_ctx, data, length, sub); +#endif if (length < 8) // Need to look ahead 8 bytes return length; diff --git a/src/lib_ccx/lib_ccx.h b/src/lib_ccx/lib_ccx.h index a765ae8f9..32bc66d10 100644 --- a/src/lib_ccx/lib_ccx.h +++ b/src/lib_ccx/lib_ccx.h @@ -329,4 +329,6 @@ int process_non_multiprogram_general_loop(struct lib_ccx_ctx* ctx, int ret, int *caps); void segment_output_file(struct lib_ccx_ctx *ctx, struct lib_cc_decode *dec_ctx); +int decode_vbi(struct lib_cc_decode *dec_ctx, uint8_t field, unsigned char *buffer, size_t len, struct cc_subtitle *sub); + #endif diff --git a/src/rust/build.rs b/src/rust/build.rs index a2005dd65..ee475397c 100644 --- a/src/rust/build.rs +++ b/src/rust/build.rs @@ -13,6 +13,11 @@ fn main() { "version", "set_binary_mode", "net_send_header", // shall be removed after NET + "process_hdcc", + "anchor_hdcc", + "store_hdcc", + "do_cb", + "decode_vbi", "write_spumux_footer", "write_spumux_header", ]); diff --git a/src/rust/lib_ccxr/src/activity.rs b/src/rust/lib_ccxr/src/activity.rs index 5ef55134c..98af74cc2 100644 --- a/src/rust/lib_ccxr/src/activity.rs +++ b/src/rust/lib_ccxr/src/activity.rs @@ -5,6 +5,13 @@ use crate::common::Options; pub trait ActivityExt { fn activity_report_version(&mut self); + fn activity_video_info( + &mut self, + hor_size: u32, + vert_size: u32, + aspect_ratio: &str, + framerate: &str, + ); } impl ActivityExt for Options { fn activity_report_version(&mut self) { @@ -15,4 +22,22 @@ impl ActivityExt for Options { stderr.flush().unwrap(); } } + fn activity_video_info( + &mut self, + hor_size: u32, + vert_size: u32, + aspect_ratio: &str, + framerate: &str, + ) { + if self.gui_mode_reports { + let mut stderr = io::stderr(); + writeln!( + stderr, + "###VIDEOINFO#{}#{}#{}#{}", + hor_size, vert_size, aspect_ratio, framerate + ) + .unwrap(); + stderr.flush().unwrap(); + } + } } diff --git a/src/rust/lib_ccxr/src/common/bitstream.rs b/src/rust/lib_ccxr/src/common/bitstream.rs index ba873e793..7b3d22be6 100644 --- a/src/rust/lib_ccxr/src/common/bitstream.rs +++ b/src/rust/lib_ccxr/src/common/bitstream.rs @@ -1,5 +1,6 @@ use crate::fatal; use crate::util::log::ExitCause; +use crate::util::log::{debug, DebugMessageFlag}; use thiserror::Error; #[derive(Debug, Error)] @@ -23,7 +24,12 @@ pub struct BitStreamRust<'a> { pub _i_pos: usize, pub _i_bpos: u8, } - +#[macro_export] +macro_rules! dbg_es { + ($($args:expr),*) => { + debug!(msg_type = DebugMessageFlag::VERBOSE; "{}", format!($($args),*)) + }; +} impl<'a> BitStreamRust<'a> { /// Create a new bitstream. Empty data is allowed (bits_left = 0). pub fn new(data: &'a [u8]) -> Result { @@ -333,6 +339,129 @@ impl<'a> BitStreamRust<'a> { res } + // Return the next startcode or sequence_error_code if not enough + // data was left in the bitstream. Also set esstream->bitsleft. + // The bitstream pointer shall be moved to the begin of the start + // code if found, or to the position where a search would continue + // would more data be made available. + // This function discards all data until the start code is + // found + pub fn search_start_code(&mut self) -> Result { + self.make_byte_aligned()?; + + // Keep a negative esstream->bitsleft, but correct it. + if self.bits_left <= 0 { + dbg_es!("search_start_code: bitsleft <= 0"); + self.bits_left -= 8 * 4; + return Ok(0xB4); + } + + let mut tpos = self.pos; + + // Scan for 0x000001xx in header + loop { + // Find next 0x00 byte + let remaining_data = &self.data[tpos..]; + if let Some(zero_offset) = remaining_data.iter().position(|&b| b == 0x00) { + tpos += zero_offset; + } else { + // We don't even have the starting 0x00 + tpos = self.data.len(); + self.bits_left = -8 * 4; + break; + } + + if tpos + 3 >= self.data.len() { + // Not enough bytes left to check for 0x000001?? + self.bits_left = 8 * (self.data.len() as i64 - (tpos + 4) as i64); + break; + } else if self.data[tpos + 1] == 0x00 && self.data[tpos + 2] == 0x01 { + // Found 0x000001?? + self.bits_left = 8 * (self.data.len() as i64 - (tpos + 4) as i64); + break; + } + // Keep searching + tpos += 1; + } + + self.pos = tpos; + if self.bits_left < 0 { + dbg_es!("search_start_code: bitsleft <= 0"); + Ok(0xB4) + } else { + dbg_es!("search_start_code: Found {:02X}", self.data[tpos + 3]); + Ok(self.data[tpos + 3]) + } + } + + // Return the next startcode or sequence_error_code if not enough + // data was left in the bitstream. Also set esstream->bitsleft. + // The bitstream pointer shall be moved to the begin of the start + // code if found, or to the position where a search would continue + // would more data be made available. + // Only NULL bytes before the start code are discarded, if a non + // NULL byte is encountered esstream->error is set to TRUE and the + // function returns sequence_error_code with the pointer set after + // that byte. + pub fn next_start_code(&mut self) -> Result { + if self.error || self.bits_left < 0 { + return Ok(0xB4); + } + + self.make_byte_aligned()?; + + // Only start looking if there is enough data. Adjust bitsleft. + if self.bits_left < 4 * 8 { + dbg_es!("next_start_code: bitsleft {} < 32", self.bits_left); + self.bits_left -= 8 * 4; + return Ok(0xB4); + } + + let mut tmp: u8; + while (self.bitstream_get_num(4, false)? & 0x00FFFFFF) != 0x00010000 // LSB 0x000001?? + && self.bits_left > 0 + { + tmp = self.bitstream_get_num(1, true)? as u8; + if tmp != 0 { + dbg_es!("next_start_code: Non zero stuffing"); + self.error = true; + return Ok(0xB4); + } + } + + if self.bits_left < 8 { + self.bits_left -= 8; + dbg_es!("next_start_code: bitsleft <= 0"); + Ok(0xB4) + } else { + dbg_es!("next_start_code: Found {:02X}", self.data[self.pos + 3]); + + if self.data[self.pos + 3] == 0xB4 { + dbg_es!("B4: assume bitstream syntax error!"); + self.error = true; + } + + Ok(self.data[self.pos + 3]) + } + } + pub fn init_bitstream(&mut self, start: usize, end: usize) -> Result<(), BitstreamError> { + if start > end || end > self.data.len() { + return Err(BitstreamError::InsufficientData); + } + + self.pos = start; + self.bpos = 8; + self.bits_left = (end - start) as i64 * 8; + self.error = false; + self._i_pos = 0; + self._i_bpos = 0; + + if self.bits_left < 0 { + return Err(BitstreamError::NegativeLength); + } + + Ok(()) + } } #[cfg(test)] mod tests { diff --git a/src/rust/lib_ccxr/src/time/units.rs b/src/rust/lib_ccxr/src/time/units.rs index 16714d142..d479ca6f5 100644 --- a/src/rust/lib_ccxr/src/time/units.rs +++ b/src/rust/lib_ccxr/src/time/units.rs @@ -537,12 +537,12 @@ impl FrameCount { /// [`Timestamp`] instead of the other format. #[derive(Copy, Clone, Debug)] pub struct GopTimeCode { - drop_frame: bool, - time_code_hours: u8, - time_code_minutes: u8, - time_code_seconds: u8, - time_code_pictures: u8, - timestamp: Timestamp, + pub drop_frame: bool, + pub time_code_hours: u8, + pub time_code_minutes: u8, + pub time_code_seconds: u8, + pub time_code_pictures: u8, + pub timestamp: Timestamp, } impl GopTimeCode { diff --git a/src/rust/src/es/core.rs b/src/rust/src/es/core.rs new file mode 100644 index 000000000..c388dd251 --- /dev/null +++ b/src/rust/src/es/core.rs @@ -0,0 +1,226 @@ +use crate::bindings::{cc_subtitle, encoder_ctx, lib_cc_decode}; +use crate::es::eau::read_eau_info; +use crate::es::gop::read_gop_info; +use crate::es::pic::{read_pic_data, read_pic_info}; +use crate::es::seq::read_seq_info; +use lib_ccxr::common::{BitStreamRust, BitstreamError, Options}; +use lib_ccxr::dbg_es; +use lib_ccxr::util::log::DebugMessageFlag; +use lib_ccxr::{debug, info}; +use std::slice; +/* Process a mpeg-2 data stream with "length" bytes in buffer "data". + * The number of processed bytes is returned. + * Defined in ISO/IEC 13818-2 6.2 */ +pub fn process_m2v( + enc_ctx: &mut encoder_ctx, + dec_ctx: &mut lib_cc_decode, + data: &[u8], + length: usize, + sub: &mut cc_subtitle, + ccx_options: &mut Options, +) -> Result { + if length < 8 { + // Need to look ahead 8 bytes + return Ok(length); + } + + // Init bitstream + let mut esstream = BitStreamRust::new(data)?; + esstream.init_bitstream(0, length)?; + + // Process data. The return value is ignored as esstream.pos holds + // the information how far the parsing progressed. + let _ = es_video_sequence(enc_ctx, dec_ctx, &mut esstream, sub, ccx_options)?; + + // This returns how many bytes were processed and can therefore + // be discarded from "buffer". "esstream.pos" points to the next byte + // where processing will continue. + Ok(esstream.pos) +} + +// Return TRUE if the video sequence was finished, FALSE +// Otherwise. estream->pos shall point to the position where +// the next call will continue, i.e. the possible begin of an +// unfinished video sequence or after the finished sequence. +pub fn es_video_sequence( + enc_ctx: &mut encoder_ctx, + dec_ctx: &mut lib_cc_decode, + esstream: &mut BitStreamRust, + sub: &mut cc_subtitle, + ccx_options: &mut Options, +) -> Result { + // Avoid "Skip forward" message on first call and later only + // once per search. + static mut NOSKIPMESSAGE: bool = true; + + dbg_es!("es_video_sequence()\n"); + + esstream.error = false; + + // Analyze sequence header ... + if dec_ctx.no_bitstream_error == 0 { + // We might start here because of a syntax error. Discard + // all data until a new sequence_header_code or group_start_code + // is found. + + unsafe { + if !NOSKIPMESSAGE { + // Avoid unnecessary output. + info!("\nSkip forward to the next Sequence or GOP start.\n"); + } else { + NOSKIPMESSAGE = false; + } + } + + loop { + // search_start_code() cannot produce esstream->error + let startcode = esstream.search_start_code()?; + if esstream.bits_left < 0 { + unsafe { + NOSKIPMESSAGE = true; + } + return Ok(false); + } + + if startcode == 0xB3 || startcode == 0xB8 { + // found it + break; + } + + esstream.skip_bits(4 * 8)?; + } + + dec_ctx.no_bitstream_error = 1; + dec_ctx.saw_seqgoppic = 0; + dec_ctx.in_pic_data = 0; + } + + loop { + let startcode = esstream.next_start_code()?; + + dbg_es!( + "\nM2V - next start code {:02X} {}\n", + startcode, + dec_ctx.in_pic_data + ); + + // Syntax check - also returns on bitsleft < 0 + if startcode == 0xB4 { + if esstream.error { + dec_ctx.no_bitstream_error = 0; + dbg_es!("es_video_sequence: syntax problem.\n"); + } + + dbg_es!("es_video_sequence: return on B4 startcode.\n"); + + return Ok(false); + } + + // Sequence_end_code + if startcode == 0xB7 { + esstream.skip_bits(32)?; // Advance bitstream + dec_ctx.no_bitstream_error = 0; + break; // Exit the main loop - sequence is complete + } + // Sequence header + else if dec_ctx.in_pic_data == 0 && startcode == 0xB3 { + if !read_seq_info(dec_ctx, esstream, ccx_options)? { + if esstream.error { + dec_ctx.no_bitstream_error = 0; + } + return Ok(false); + } + dec_ctx.saw_seqgoppic = 1; + continue; // Continue to next iteration + } + // Group of pictures + else if dec_ctx.in_pic_data == 0 && startcode == 0xB8 { + if !unsafe { read_gop_info(enc_ctx, dec_ctx, esstream, sub)? } { + if esstream.error { + dec_ctx.no_bitstream_error = 0; + } + return Ok(false); + } + dec_ctx.saw_seqgoppic = 2; + continue; // Continue to next iteration + } + // Picture + else if dec_ctx.in_pic_data == 0 && startcode == 0x00 { + if !unsafe { read_pic_info(enc_ctx, dec_ctx, esstream, sub)? } { + if esstream.error { + dec_ctx.no_bitstream_error = 0; + } + return Ok(false); + } + dec_ctx.saw_seqgoppic = 3; + dec_ctx.in_pic_data = 1; + continue; // Continue to next iteration + } + // Only looks for extension and user data if we saw sequence, gop + // or picture info before. + // This check needs to be before the "dec_ctx->in_pic_data" part. + else if dec_ctx.saw_seqgoppic > 0 && (startcode == 0xB2 || startcode == 0xB5) { + if !read_eau_info(enc_ctx, dec_ctx, esstream, dec_ctx.saw_seqgoppic - 1, sub)? { + if esstream.error { + dec_ctx.no_bitstream_error = 0; + } + return Ok(false); + } + dec_ctx.saw_seqgoppic = 0; + continue; // Continue to next iteration + } else if dec_ctx.in_pic_data != 0 { + // See comment in read_pic_data() + if !read_pic_data(esstream)? { + if esstream.error { + dec_ctx.no_bitstream_error = 0; + } + return Ok(false); + } + dec_ctx.saw_seqgoppic = 0; + dec_ctx.in_pic_data = 0; + continue; // Continue to next iteration + } else { + // Nothing found - bitstream error + if startcode == 0xBA { + info!("\nFound PACK header in ES data. Probably wrong stream mode!\n"); + } else { + info!("\nUnexpected startcode: {:02X}\n", startcode); + } + dec_ctx.no_bitstream_error = 0; + return Ok(false); + } + } + + Ok(true) +} +/// Temporarily placed here, import from ts_core when ts module is merged +/// # Safety +/// This function is unsafe because it may dereference a raw pointer. +pub unsafe fn dump(start: *const u8, l: i32, abs_start: u64, clear_high_bit: u32) { + let data = slice::from_raw_parts(start, l as usize); + + let mut x = 0; + while x < l { + info!("{:08} | ", x + abs_start as i32); + + for j in 0..16 { + if x + j < l { + info!("{:02X} ", data[(x + j) as usize]); + } else { + info!(" "); + } + } + info!(" | "); + + for j in 0..16 { + if x + j < l && data[(x + j) as usize] >= b' ' { + let ch = data[(x + j) as usize] & (if clear_high_bit != 0 { 0x7F } else { 0xFF }); + info!("{}", ch as char); + } else { + info!(" "); + } + } + info!("\n"); + x += 16; + } +} diff --git a/src/rust/src/es/eau.rs b/src/rust/src/es/eau.rs new file mode 100644 index 000000000..41ddfd927 --- /dev/null +++ b/src/rust/src/es/eau.rs @@ -0,0 +1,143 @@ +use crate::bindings::{cc_subtitle, encoder_ctx, lib_cc_decode}; +use crate::es::userdata::user_data; +use lib_ccxr::common::{BitStreamRust, BitstreamError}; +use lib_ccxr::dbg_es; +use lib_ccxr::util::log::{DebugMessageFlag, ExitCause}; +use lib_ccxr::{debug, fatal, info}; + +// Return TRUE if all was read. FALSE if a problem occurred: +// If a bitstream syntax problem occurred the bitstream will +// point to after the problem, in case we run out of data the bitstream +// will point to where we want to restart after getting more. +pub fn read_eau_info( + enc_ctx: &mut encoder_ctx, + dec_ctx: &mut lib_cc_decode, + esstream: &mut BitStreamRust, + udtype: i32, + sub: &mut cc_subtitle, +) -> Result { + dbg_es!("Read Extension and User Info\n"); + + // We only get here after seeing that start code + let tst = esstream.next_bytes(4)?; + if tst.len() < 4 + || tst[0] != 0x00 + || tst[1] != 0x00 + || tst[2] != 0x01 + || (tst[3] != 0xB2 && tst[3] != 0xB5) + { + // (0x000001 B2||B5) + fatal!(cause = ExitCause::Bug; "In read_eau_info: Impossible values for tst. Please file a bug report on GitHub.\n"); + } + + // The following extension_and_user_data() function makes sure that + // user data is not evaluated twice. Should the function run out of + // data it will make sure that esstream points to where we want to + // continue after getting more. + if !extension_and_user_data(enc_ctx, dec_ctx, esstream, udtype, sub)? { + if esstream.error { + dbg_es!("\nWarning: Retry while reading Extension and User Data!\n"); + } else { + dbg_es!("\nBitstream problem while reading Extension and User Data!\n"); + } + + return Ok(false); + } + + dbg_es!("Read Extension and User Info - processed\n\n"); + + Ok(true) +} + +// Return TRUE if the data parsing finished, FALSE otherwise. +// estream->pos is advanced. Data is only processed if esstream->error +// is FALSE, parsing can set esstream->error to TRUE. +pub fn extension_and_user_data( + enc_ctx: &mut encoder_ctx, + dec_ctx: &mut lib_cc_decode, + esstream: &mut BitStreamRust, + udtype: i32, + sub: &mut cc_subtitle, +) -> Result { + dbg_es!("Extension and user data({})\n", udtype); + + if esstream.error || esstream.bits_left <= 0 { + return Ok(false); + } + + // Remember where to continue + let eau_start_pos = esstream.pos; + let eau_start_bpos = esstream.bpos; + + loop { + let startcode = esstream.next_start_code()?; + + if startcode == 0xB2 || startcode == 0xB5 { + // Skip u32 (advance bitstream) + esstream.skip_bits(32)?; + let dstart_pos = esstream.pos; + let dstart_bpos = esstream.bpos; + + // Advance esstream to the next startcode. Verify that + // the whole extension was available and discard blocks + // followed by PACK headers. The latter usually indicates + // a PS treated as an ES. + let nextstartcode = esstream.search_start_code()?; + if nextstartcode == 0xBA { + info!("\nFound PACK header in ES data. Probably wrong stream mode!\n"); + esstream.error = true; + return Ok(false); + } + + if esstream.error { + dbg_es!("Extension and user data - syntax problem\n"); + return Ok(false); + } + + if esstream.bits_left < 0 { + dbg_es!("Extension and user data - incomplete\n"); + // Restore to where we need to continue + esstream.init_bitstream(eau_start_pos, esstream.data.len())?; + esstream.bpos = eau_start_bpos; + esstream.bits_left = -1; // Redundant + return Ok(false); + } + + if startcode == 0xB2 { + let mut ustream = BitStreamRust::new(&esstream.data[dstart_pos..])?; + ustream.bpos = dstart_bpos; + ustream.bits_left = + (esstream.pos - dstart_pos) as i64 * 8 + (esstream.bpos - dstart_bpos) as i64; + unsafe { + user_data(enc_ctx, dec_ctx, &mut ustream, udtype, sub)?; + } + } else { + dbg_es!("Skip {} bytes extension data.\n", esstream.pos - dstart_pos); + } + // If we get here esstream points to the end of a block + // of extension or user data. Should we run out of data in + // this loop this is where we want to restart after getting more. + // eau_start = esstream->pos; (update for next iteration) + } else { + break; + } + } + + if esstream.error { + dbg_es!("Extension and user data - syntax problem\n"); + return Ok(false); + } + if esstream.bits_left < 0 { + dbg_es!("Extension and user data - incomplete\n"); + // Restore to where we need to continue + esstream.init_bitstream(eau_start_pos, esstream.data.len())?; + esstream.bpos = eau_start_bpos; + esstream.bits_left = -1; // Redundant + return Ok(false); + } + + dbg_es!("Extension and user data - processed\n"); + + // Read complete + Ok(true) +} diff --git a/src/rust/src/es/gop.rs b/src/rust/src/es/gop.rs new file mode 100644 index 000000000..b754f1e4b --- /dev/null +++ b/src/rust/src/es/gop.rs @@ -0,0 +1,211 @@ +use crate::bindings::{cc_subtitle, encoder_ctx, lib_cc_decode, process_hdcc}; +use crate::libccxr_exports::time::{ + ccxr_get_fts_max, ccxr_print_debug_timing, ccxr_set_current_pts, ccxr_set_fts, + write_gop_time_code, +}; +use crate::{ + ccx_options, current_fps, first_gop_time, frames_since_ref_time, fts_at_gop_start, gop_time, + total_frames_count, MPEG_CLOCK_FREQ, +}; +use lib_ccxr::common::{BitStreamRust, BitstreamError}; +use lib_ccxr::dbg_es; +use lib_ccxr::time::c_functions::{calculate_ms_gop_time, gop_accepted, print_mstime_static}; +use lib_ccxr::time::{GopTimeCode, Timestamp}; +use lib_ccxr::util::log::{DebugMessageFlag, ExitCause}; +use lib_ccxr::{debug, fatal, info}; + +// Return TRUE if all was read. FALSE if a problem occurred: +// If a bitstream syntax problem occurred the bitstream will +// point to after the problem, in case we run out of data the bitstream +// will point to where we want to restart after getting more. +/// # Safety +/// This function is unsafe because it calls `gop_header`. +pub unsafe fn read_gop_info( + enc_ctx: &mut encoder_ctx, + dec_ctx: &mut lib_cc_decode, + esstream: &mut BitStreamRust, + sub: &mut cc_subtitle, +) -> Result { + dbg_es!("Read GOP Info"); + + // We only get here after seeing that start code + if esstream.bitstream_get_num(4, false)? != 0xB8010000 { + // LSB first (0x000001B8) + fatal!(cause = ExitCause::Bug; "In read_gop_info: next_u32(esstream) != 0xB8010000. Please file a bug report on GitHub."); + } + + // If we get here esstream points to the start of a group_start_code + // should we run out of data in esstream this is where we want to restart + // after getting more. + let gop_info_start_pos = esstream.pos; + let gop_info_start_bpos = esstream.bpos; + + gop_header(enc_ctx, dec_ctx, esstream, sub)?; + // extension_and_user_data(esstream); + + if esstream.error { + return Ok(false); + } + + if esstream.bits_left < 0 { + esstream.init_bitstream(gop_info_start_pos, gop_info_start_bpos as usize)?; + return Ok(false); + } + + dbg_es!("Read GOP Info - processed\n"); + + Ok(true) +} + +// Return TRUE if the data parsing finished, FALSE otherwise. +// estream->pos is advanced. Data is only processed if esstream->error +// is FALSE, parsing can set esstream->error to TRUE. +unsafe fn gop_header( + enc_ctx: &mut encoder_ctx, + dec_ctx: &mut lib_cc_decode, + esstream: &mut BitStreamRust, + sub: &mut cc_subtitle, +) -> Result { + dbg_es!("GOP header"); + + if esstream.error || esstream.bits_left <= 0 { + return Ok(false); + } + + // We only get here after seeing that start code + if esstream.bitstream_get_num(4, true)? != 0xB8010000 { + // LSB first (0x000001B8) + fatal!(cause = ExitCause::Bug; "In gop_header: read_u32(esstream) != 0xB8010000. Please file a bug report on GitHub."); + } + + let drop_frame_flag = esstream.read_bits(1)? as u32; + let mut gtc = GopTimeCode { + drop_frame: drop_frame_flag != 0, + time_code_hours: esstream.read_bits(5)? as u8, + time_code_minutes: esstream.read_bits(6)? as u8, + time_code_seconds: 0, + time_code_pictures: 0, + timestamp: Default::default(), + }; + esstream.skip_bits(1)?; // Marker bit + gtc.time_code_seconds = esstream.read_bits(6)? as u8; + gtc.time_code_pictures = esstream.read_bits(6)? as u8; + calculate_ms_gop_time(gtc); + + if esstream.bits_left < 0 { + return Ok(false); + } + + if gop_accepted(gtc) { + // Do GOP padding during GOP header. The previous GOP and all + // included captions are written. Use the current GOP time to + // do the padding. + + // Flush buffered cc blocks before doing the housekeeping + if dec_ctx.has_ccdata_buffered != 0 { + process_hdcc(enc_ctx, dec_ctx, sub); + } + + // Last GOPs pulldown frames + if (dec_ctx.current_pulldownfields > 0) != (dec_ctx.pulldownfields > 0) { + dec_ctx.current_pulldownfields = dec_ctx.pulldownfields; + dbg_es!( + "Pulldown: {}", + if dec_ctx.pulldownfields != 0 { + "on" + } else { + "off" + } + ); + if dec_ctx.pulldownfields != 0 { + dbg_es!(" - {} fields in last GOP", dec_ctx.pulldownfields); + } + dbg_es!(""); + } + dec_ctx.pulldownfields = 0; + + // Report synchronization jumps between GOPs. Warn if there + // are 20% or more deviation. + if (ccx_options.debug_mask & 4 != 0) + && ((gtc.timestamp.millis() - gop_time.ms // more than 20% longer + > (dec_ctx.frames_since_last_gop as f64 * 1000.0 / current_fps * 1.2) as i64) + || (gtc.timestamp.millis() - gop_time.ms // or 20% shorter + < (dec_ctx.frames_since_last_gop as f64 * 1000.0 / current_fps * 0.8) as i64)) + && first_gop_time.inited != 0 + { + info!("\rWarning: Jump in GOP timing."); + info!( + " (old) {}", + print_mstime_static(Timestamp::from_millis(gop_time.ms), ':') + ); + info!( + " + {} ({}F)", + print_mstime_static( + Timestamp::from_millis( + (dec_ctx.frames_since_last_gop as f64 * 1000.0 / current_fps) as i64 + ), + ':' + ), + dec_ctx.frames_since_last_gop + ); + info!(" != (new) {}", print_mstime_static(gtc.timestamp, ':')); + } + + if first_gop_time.inited == 0 { + first_gop_time = write_gop_time_code(Some(gtc)); + + // It needs to be "+1" because the frame count starts at 0 and we + // need the length of all frames. + if total_frames_count == 0 { + // If this is the first frame there cannot be an offset + (*dec_ctx.timing).fts_fc_offset = 0; + // first_gop_time.ms stays unchanged + } else { + (*dec_ctx.timing).fts_fc_offset = + ((total_frames_count + 1) as f64 * 1000.0 / current_fps) as i64; + // Compensate for those written before + first_gop_time.ms -= (*dec_ctx.timing).fts_fc_offset; + } + + debug!(msg_type = DebugMessageFlag::TIME; "\nFirst GOP time: {:02}:{:02}:{:02}:{:03} {:+}ms", + gtc.time_code_hours, + gtc.time_code_minutes, + gtc.time_code_seconds, + (1000.0 * gtc.time_code_pictures as f64 / current_fps) as u32, + (*dec_ctx.timing).fts_fc_offset); + } + + gop_time = write_gop_time_code(Some(gtc)); + + dec_ctx.frames_since_last_gop = 0; + // Indicate that we read a gop header (since last frame number 0) + dec_ctx.saw_gop_header = 1; + + // If we use GOP timing, reconstruct the PTS from the GOP + if ccx_options.use_gop_as_pts == 1 { + ccxr_set_current_pts( + dec_ctx.timing, + gtc.timestamp.millis() * (MPEG_CLOCK_FREQ as i64 / 1000), + ); + (*dec_ctx.timing).current_tref = 0; + frames_since_ref_time = 0; + ccxr_set_fts(dec_ctx.timing); + fts_at_gop_start = ccxr_get_fts_max(dec_ctx.timing); + } else { + // FIXME: Wrong when PTS are not increasing but are identical + // throughout the GOP and then jump to the next time for the + // next GOP. + // This effect will also lead to captions being one GOP early + // for DVD captions. + fts_at_gop_start = ccxr_get_fts_max(dec_ctx.timing) + (1000.0 / current_fps) as i64; + } + + if ccx_options.debug_mask & 4 != 0 { + debug!(msg_type = DebugMessageFlag::TIME; "\nNew GOP:"); + debug!(msg_type = DebugMessageFlag::TIME; "\nDrop frame flag: {}:", drop_frame_flag); + ccxr_print_debug_timing(dec_ctx.timing); + } + } + + Ok(true) +} diff --git a/src/rust/src/es/mod.rs b/src/rust/src/es/mod.rs new file mode 100644 index 000000000..f5fd3869e --- /dev/null +++ b/src/rust/src/es/mod.rs @@ -0,0 +1,62 @@ +use crate::bindings::{ + cc_subtitle, ccx_frame_type, ccx_frame_type_CCX_FRAME_TYPE_B_FRAME, + ccx_frame_type_CCX_FRAME_TYPE_D_FRAME, ccx_frame_type_CCX_FRAME_TYPE_I_FRAME, + ccx_frame_type_CCX_FRAME_TYPE_P_FRAME, ccx_frame_type_CCX_FRAME_TYPE_RESET_OR_UNKNOWN, + encoder_ctx, lib_cc_decode, +}; +use crate::ccx_options; +use crate::encoder::FromCType; +use crate::es::core::process_m2v; +use lib_ccxr::common::{FrameType, Options}; + +pub mod core; +pub mod eau; +pub mod gop; +pub mod pic; +pub mod seq; +pub mod userdata; + +impl FromCType for FrameType { + // TODO move to ctorust.rs when demuxer is merged + unsafe fn from_ctype(c_value: ccx_frame_type) -> Option { + match c_value { + ccx_frame_type_CCX_FRAME_TYPE_RESET_OR_UNKNOWN => Some(FrameType::ResetOrUnknown), + ccx_frame_type_CCX_FRAME_TYPE_I_FRAME => Some(FrameType::IFrame), + ccx_frame_type_CCX_FRAME_TYPE_P_FRAME => Some(FrameType::PFrame), + ccx_frame_type_CCX_FRAME_TYPE_B_FRAME => Some(FrameType::BFrame), + ccx_frame_type_CCX_FRAME_TYPE_D_FRAME => Some(FrameType::DFrame), + _ => None, + } + } +} +/// # Safety +/// This function is unsafe because it dereferences raw pointers from C structs +#[no_mangle] +pub unsafe extern "C" fn ccxr_process_m2v( + enc_ctx: *mut encoder_ctx, + dec_ctx: *mut lib_cc_decode, + data: *const u8, + length: usize, + sub: *mut cc_subtitle, +) -> usize { + if enc_ctx.is_null() || dec_ctx.is_null() || data.is_null() || sub.is_null() { + // This shouldn't happen + return 0; + } + let mut CcxOptions = Options { + // that's the only thing that's required for now + gui_mode_reports: ccx_options.gui_mode_reports != 0, + ..Default::default() + }; + + let data_slice = std::slice::from_raw_parts(data, length); + process_m2v( + &mut *enc_ctx, + &mut *dec_ctx, + data_slice, + length, + &mut *sub, + &mut CcxOptions, + ) + .unwrap_or(0) +} diff --git a/src/rust/src/es/pic.rs b/src/rust/src/es/pic.rs new file mode 100644 index 000000000..f0cb32aa8 --- /dev/null +++ b/src/rust/src/es/pic.rs @@ -0,0 +1,339 @@ +use crate::bindings::{anchor_hdcc, cc_subtitle, encoder_ctx, lib_cc_decode, process_hdcc}; +use crate::libccxr_exports::time::{ccxr_get_fts, ccxr_print_debug_timing, ccxr_set_fts}; +use crate::{ccx_options, frames_since_ref_time, fts_at_gop_start, total_frames_count}; +use lib_ccxr::common::{BitStreamRust, BitstreamError, FrameType}; +use lib_ccxr::debug; +use lib_ccxr::util::log::DebugMessageFlag; +use lib_ccxr::util::log::ExitCause; +use lib_ccxr::{dbg_es, fatal}; +use std::os::raw::c_int; + +// Return TRUE if the data parsing finished, FALSE otherwise. +// estream->pos is advanced. Data is only processed if esstream->error +// is FALSE, parsing can set esstream->error to TRUE. +fn pic_header( + ctx: &mut lib_cc_decode, + esstream: &mut BitStreamRust, +) -> Result { + dbg_es!("PIC header"); + + if esstream.error || esstream.bits_left <= 0 { + return Ok(false); + } + + // We only get here after seeing that start code + if esstream.bitstream_get_num(4, true)? != 0x00010000 { + // LSB first (0x00000100) + fatal!(cause = ExitCause::Bug; "In pic_header: read_u32(esstream) != 0x00010000. Please file a bug report on GitHub."); + } + + ctx.temporal_reference = esstream.read_bits(10)? as i32; + ctx.picture_coding_type = esstream.read_bits(3)? as u32; + + if ctx.picture_coding_type == FrameType::IFrame as _ { + unsafe { + // Write I-Frame in ffprobe format for easy comparison + ctx.num_key_frames += 1; + debug!(msg_type = DebugMessageFlag::VIDEO_STREAM; "key_frame=1|pkt_pts={}|pict_type=I", ((*ctx.timing).current_pts)); + } + } + + // Discard vbv_delay + esstream.skip_bits(16)?; + + // Discard some information + if ctx.picture_coding_type == FrameType::PFrame as _ + || ctx.picture_coding_type == FrameType::BFrame as _ + { + esstream.skip_bits(4)?; + } + if ctx.picture_coding_type == FrameType::BFrame as _ { + esstream.skip_bits(4)?; + } + + // extra_information + while esstream.read_bits(1)? == 1 { + esstream.skip_bits(8)?; + } + + if esstream.bits_left < 0 { + return Ok(false); + } + + if !(ctx.picture_coding_type == FrameType::IFrame as _ + || ctx.picture_coding_type == FrameType::PFrame as _ + || ctx.picture_coding_type == FrameType::BFrame as _) + { + if esstream.bits_left >= 0 { + // When bits left, this is wrong + esstream.error = true; + } + + if esstream.error { + dbg_es!("pic_header: syntax problem."); + } + return Ok(false); + } + + Ok(true) +} + +// Return TRUE if the data parsing finished, FALSE otherwise. +// estream->pos is advanced. Data is only processed if esstream->error +// is FALSE, parsing can set esstream->error to TRUE. +fn pic_coding_ext( + ctx: &mut lib_cc_decode, + esstream: &mut BitStreamRust, +) -> Result { + dbg_es!("Picture coding extension {}", esstream.bits_left); + + if esstream.error || esstream.bits_left <= 0 { + return Ok(false); + } + + // Syntax check + if esstream.next_start_code()? != 0xB5 { + dbg_es!("pic_coding_ext: syntax problem."); + return Ok(false); + } + + esstream.bitstream_get_num(4, true)?; // skip_u32(esstream); // Advance + + // Read extension_start_code_identifier + let extension_id = esstream.read_bits(4)? as u32; + if extension_id != 0x8 { + // Picture Coding Extension ID + if esstream.bits_left >= 0 { + // When bits left, this is wrong + esstream.error = true; + } + + if esstream.error { + dbg_es!("pic_coding_ext: syntax problem."); + } + return Ok(false); + } + + // Discard some information + esstream.skip_bits(4 * 4 + 2)?; + ctx.picture_structure = esstream.read_bits(2)? as u32; + ctx.top_field_first = esstream.read_bits(1)? as u32; + esstream.skip_bits(5)?; + ctx.repeat_first_field = esstream.read_bits(1)? as u32; + esstream.skip_bits(1)?; // chroma + ctx.progressive_frame = esstream.read_bits(1)? as u32; + let composite_display = esstream.read_bits(1)? as u32; + if composite_display != 0 { + esstream.skip_bits(1 + 3 + 1 + 7 + 8)?; + } + + if esstream.bits_left < 0 { + return Ok(false); + } + + dbg_es!("Picture coding extension - processed"); + + // Read complete + Ok(true) +} + +// Return TRUE if all was read. FALSE if a problem occurred: +// If a bitstream syntax problem occurred the bitstream will +// point to after the problem, in case we run out of data the bitstream +// will point to where we want to restart after getting more. +/// # Safety +/// This function is unsafe because it calls C functions like `process_hdcc` or `anchor_hdcc` +pub unsafe fn read_pic_info( + enc_ctx: &mut encoder_ctx, + dec_ctx: &mut lib_cc_decode, + esstream: &mut BitStreamRust, + sub: &mut cc_subtitle, +) -> Result { + dbg_es!("Read PIC Info"); + + // We only get here after seeing that start code + if esstream.bitstream_get_num(4, false)? != 0x00010000 { + // LSB first (0x00000100) + fatal!(cause = ExitCause::Bug; "In read_pic_info: next_u32(esstream) != 0x00010000. Please file a bug report on GitHub."); + } + + // If we get here esstream points to the start of a group_start_code + // should we run out of data in esstream this is where we want to restart + // after getting more. + let pic_info_start_pos = esstream.pos; + let pic_info_start_bpos = esstream.bpos; + + pic_header(dec_ctx, esstream)?; + pic_coding_ext(dec_ctx, esstream)?; + + if esstream.error { + return Ok(false); + } + + if esstream.bits_left < 0 { + esstream.init_bitstream(pic_info_start_pos, pic_info_start_bpos as _)?; + return Ok(false); + } + + // A new anchor frame - flush buffered caption data. Might be flushed + // in GOP header already. + if (dec_ctx.picture_coding_type == FrameType::IFrame as _ + || dec_ctx.picture_coding_type == FrameType::PFrame as _) + && (((dec_ctx.picture_structure != 0x1) && (dec_ctx.picture_structure != 0x2)) + || (dec_ctx.temporal_reference != (*dec_ctx.timing).current_tref)) + { + // NOTE: process_hdcc() needs to be called before set_fts() as it + // uses fts_now to re-create the timeline !!!!! + if dec_ctx.has_ccdata_buffered != 0 { + process_hdcc(enc_ctx, dec_ctx, sub); + } + anchor_hdcc(dec_ctx, dec_ctx.temporal_reference); + } + + (*dec_ctx.timing).current_tref = dec_ctx.temporal_reference; + (*dec_ctx.timing).current_picture_coding_type = dec_ctx.picture_coding_type; + + // We mostly use PTS, but when the GOP mode is enabled do not set + // the FTS time here. + if ccx_options.use_gop_as_pts != 1 { + ccxr_set_fts(dec_ctx.timing); // Initialize fts + } + + // Set min_pts/sync_pts according to the current time stamp. + // Use fts_at_gop_start as reference when a GOP header was seen + // since the last frame 0. If not this is most probably a + // TS without GOP headers but with USER DATA after each picture + // header. Use the current FTS values as reference. + // Note: If a GOP header was present the reference time is from + // the beginning of the GOP, otherwise it is now. + if dec_ctx.temporal_reference == 0 { + dec_ctx.last_gop_length = dec_ctx.maxtref + 1; + dec_ctx.maxtref = dec_ctx.temporal_reference; + + // frames_since_ref_time is used in set_fts() + + if dec_ctx.saw_gop_header != 0 { + // This time (fts_at_gop_start) that was set in the + // GOP header and it might be off by one GOP. See the comment there. + frames_since_ref_time = dec_ctx.frames_since_last_gop; // Should this be 0? + } else { + // No GOP header, use the current values + fts_at_gop_start = ccxr_get_fts(dec_ctx.timing, dec_ctx.current_field); + frames_since_ref_time = 0; + } + + if ccx_options.debug_mask & 4 != 0 { + debug!(msg_type = DebugMessageFlag::TIME; "\nNew temporal reference:"); + ccxr_print_debug_timing(dec_ctx.timing); + } + + dec_ctx.saw_gop_header = 0; // Reset the value + } + + if dec_ctx.saw_gop_header == 0 && dec_ctx.picture_coding_type == FrameType::IFrame as _ { + // A new GOP begins with an I-frame. Lets hope there are + // never more than one per GOP + dec_ctx.frames_since_last_gop = 0; + } + + // Set maxtref + if dec_ctx.temporal_reference > dec_ctx.maxtref { + dec_ctx.maxtref = dec_ctx.temporal_reference; + if dec_ctx.maxtref + 1 > dec_ctx.max_gop_length { + dec_ctx.max_gop_length = dec_ctx.maxtref + 1; + } + } + + let mut extraframe = 0u32; + if dec_ctx.repeat_first_field != 0 { + dec_ctx.pulldownfields += 1; + dec_ctx.total_pulldownfields += 1; + if dec_ctx.current_progressive_sequence != 0 || (dec_ctx.total_pulldownfields % 2) == 0 { + extraframe = 1; + } + if dec_ctx.current_progressive_sequence != 0 && dec_ctx.top_field_first != 0 { + extraframe = 2; + } + debug!( + msg_type = DebugMessageFlag::VIDEO_STREAM; + "Pulldown: total pd fields: {} - {} extra frames", + dec_ctx.total_pulldownfields, + extraframe + ); + } + + dec_ctx.total_pulldownframes += extraframe; + total_frames_count += 1 + extraframe; + dec_ctx.frames_since_last_gop += (1 + extraframe) as c_int; + frames_since_ref_time += (1 + extraframe) as c_int; + + dbg_es!("Read PIC Info - processed\n"); + + Ok(true) +} + +// Return TRUE if all was read. FALSE if a problem occurred: +// If a bitstream syntax problem occurred the bitstream will +// point to after the problem, in case we run out of data the bitstream +// will point to where we want to restart after getting more. +pub fn read_pic_data(esstream: &mut BitStreamRust) -> Result { + dbg_es!("Read PIC Data\n"); + + let startcode = esstream.next_start_code()?; + + // Possibly the last call to this function ended with the last + // bit of the slice? I.e. in_pic_data is still true, but we are + // seeing the next start code. + + // We only get here after seeing that start code + if !(0x01..=0xAF).contains(&startcode) { + dbg_es!("Read Pic Data - processed0\n"); + + return Ok(true); + } + + // If we get here esstream points to the start of a slice_start_code + // should we run out of data in esstream this is where we want to restart + // after getting more. + let mut slice_start_pos = esstream.pos; + let mut slice_start_bpos = esstream.bpos; + + loop { + let startcode = esstream.next_start_code()?; + // Syntax check + if startcode == 0xB4 { + if esstream.bits_left < 0 { + esstream.init_bitstream(slice_start_pos, esstream.data.len())?; + esstream.bpos = slice_start_bpos; + } + + if esstream.error { + dbg_es!("read_pic_data: syntax problem.\n"); + } else { + dbg_es!("read_pic_data: reached end of bitstream.\n"); + } + + return Ok(false); + } + + slice_start_pos = esstream.pos; // No need to come back + slice_start_bpos = esstream.bpos; + + if (0x01..=0xAF).contains(&startcode) { + esstream.skip_bits(32)?; // Advance bitstream + esstream.search_start_code()?; // Skip this slice + } else { + break; + } + } + + if esstream.bits_left < 0 { + esstream.init_bitstream(slice_start_pos, esstream.data.len())?; + esstream.bpos = slice_start_bpos; + return Ok(false); + } + + dbg_es!("Read Pic Data - processed\n"); + + Ok(true) +} diff --git a/src/rust/src/es/seq.rs b/src/rust/src/es/seq.rs new file mode 100644 index 000000000..ddb606d0e --- /dev/null +++ b/src/rust/src/es/seq.rs @@ -0,0 +1,214 @@ +use crate::bindings::lib_cc_decode; +use crate::current_fps; +use lib_ccxr::activity::ActivityExt; +use lib_ccxr::common::{ + BitStreamRust, BitstreamError, Options, ASPECT_RATIO_TYPES, FRAMERATES_TYPES, FRAMERATES_VALUES, +}; +use lib_ccxr::debug; +use lib_ccxr::util::log::{DebugMessageFlag, ExitCause}; +use lib_ccxr::{dbg_es, fatal, info}; + +// Return TRUE if the data parsing finished, FALSE otherwise. +// estream->pos is advanced. Data is only processed if esstream->error +// is FALSE, parsing can set esstream->error to TRUE. +fn sequence_header( + ctx: &mut lib_cc_decode, + esstream: &mut BitStreamRust, + ccx_options: &mut Options, +) -> Result { + dbg_es!("Sequence header"); + + if esstream.error || esstream.bits_left <= 0 { + return Ok(false); + } + + // We only get here after seeing that start code + if esstream.bitstream_get_num(4, true)? != 0xB3010000 { + // LSB first (0x000001B3) + fatal!(cause = ExitCause::Bug; "In sequence_header: read_u32(esstream) != 0xB3010000. Please file a bug report on GitHub."); + } + + let hor_size = esstream.read_bits(12)? as u32; + let vert_size = esstream.read_bits(12)? as u32; + let aspect_ratio = esstream.read_bits(4)? as u32; + let frame_rate = esstream.read_bits(4)? as u32; + + // Discard some information + esstream.read_bits(18 + 1 + 10 + 1)?; + + // load_intra_quantiser_matrix + if esstream.read_bits(1)? != 0 { + esstream.skip_bits(8 * 64)?; + } + // load_non_intra_quantiser_matrix + if esstream.read_bits(1)? != 0 { + esstream.skip_bits(8 * 64)?; + } + + if esstream.bits_left < 0 { + return Ok(false); + } + + // If we got the whole sequence, process + if hor_size != ctx.current_hor_size + || vert_size != ctx.current_vert_size + || aspect_ratio != ctx.current_aspect_ratio + || frame_rate != ctx.current_frame_rate + { + // If horizontal/vertical size, framerate and/or aspect + // ratio are illegal, we discard the + // whole sequence info. + if (288..=1088).contains(&vert_size) && + (352..=1920).contains(&hor_size) && + (hor_size * 100) / vert_size >= (352 * 100) / 576 && // The weird *100 is to avoid using floats + hor_size / vert_size <= 2 && + frame_rate > 0 && frame_rate < 9 && + aspect_ratio > 0 && aspect_ratio < 5 + { + info!("\n\nNew video information found"); + info!("\n"); + info!( + "[{} * {}] [AR: {}] [FR: {}]", + hor_size, + vert_size, + ASPECT_RATIO_TYPES[aspect_ratio as usize], + FRAMERATES_TYPES[frame_rate as usize] + ); + // No newline, force the output of progressive info in picture + // info part. + ctx.current_progressive_sequence = 2; + + ctx.current_hor_size = hor_size; + ctx.current_vert_size = vert_size; + ctx.current_aspect_ratio = aspect_ratio; + ctx.current_frame_rate = frame_rate; + unsafe { + current_fps = FRAMERATES_VALUES[ctx.current_frame_rate as usize]; + } + ccx_options.activity_video_info( + hor_size, + vert_size, + ASPECT_RATIO_TYPES[aspect_ratio as usize], + FRAMERATES_TYPES[frame_rate as usize], + ); + } else { + dbg_es!("\nInvalid sequence header:"); + dbg_es!( + "V: {} H: {} FR: {} AS: {}", + vert_size, + hor_size, + frame_rate, + aspect_ratio + ); + esstream.error = true; + return Ok(false); + } + } + + // Read complete + Ok(true) +} + +// Return TRUE if the data parsing finished, FALSE otherwise. +// estream->pos is advanced. Data is only processed if esstream->error +// is FALSE, parsing can set esstream->error to TRUE. +fn sequence_ext( + ctx: &mut lib_cc_decode, + esstream: &mut BitStreamRust, +) -> Result { + dbg_es!("Sequence extension"); + + if esstream.error || esstream.bits_left <= 0 { + return Ok(false); + } + + // Syntax check + if esstream.next_start_code()? != 0xB5 { + dbg_es!("sequence_ext: syntax problem."); + return Ok(false); + } + + esstream.bitstream_get_num(4, true)?; // skip_u32(esstream); // Advance + + // Read extension_start_code_identifier + let extension_id = esstream.read_bits(4)? as u32; + if extension_id != 0x1 { + // Sequence Extension ID + if esstream.bits_left >= 0 { + // When bits left, this is wrong + esstream.error = true; + } + + if esstream.error { + dbg_es!("sequence_ext: syntax problem."); + } + return Ok(false); + } + + // Discard some information + esstream.skip_bits(8)?; + let progressive_sequence = esstream.read_bits(1)? as u32; + + if progressive_sequence != ctx.current_progressive_sequence { + ctx.current_progressive_sequence = progressive_sequence; + info!( + " [progressive: {}]\n\n", + if progressive_sequence != 0 { + "yes" + } else { + "no" + } + ); + } + + esstream.skip_bits(2 + 2 + 2 + 12 + 1 + 8 + 1 + 2 + 5)?; + + if esstream.bits_left < 0 { + return Ok(false); + } + + // Read complete + Ok(true) +} +// Return TRUE if all was read. FALSE if a problem occurred: +// If a bitstream syntax problem occurred the bitstream will +// point to after the problem, in case we run out of data the bitstream +// will point to where we want to restart after getting more. +pub fn read_seq_info( + ctx: &mut lib_cc_decode, + esstream: &mut BitStreamRust, + ccx_options: &mut Options, +) -> Result { + dbg_es!("Read Sequence Info"); + + // We only get here after seeing that start code + if esstream.bitstream_get_num(4, false)? != 0xB3010000 { + // LSB first (0x000001B3) + fatal!(cause = ExitCause::Bug; "In read_seq_info: next_u32(esstream) != 0xB3010000. Please file a bug report on GitHub."); + } + + // If we get here esstream points to the start of a sequence_header_code + // should we run out of data in esstream this is where we want to restart + // after getting more. + let video_seq_start_pos = esstream.pos; + let video_seq_start_bpos = esstream.bpos; + + sequence_header(ctx, esstream, ccx_options)?; + sequence_ext(ctx, esstream)?; + // FIXME: if sequence extension is missing this is not MPEG-2, + // or broken. Set bitstream error. + // extension_and_user_data(esstream); + + if esstream.error { + return Ok(false); + } + + if esstream.bits_left < 0 { + esstream.init_bitstream(video_seq_start_pos, video_seq_start_bpos as usize)?; + return Ok(false); + } + + dbg_es!("Read Sequence Info - processed\n"); + + Ok(true) +} diff --git a/src/rust/src/es/userdata.rs b/src/rust/src/es/userdata.rs new file mode 100644 index 000000000..1b6675a6d --- /dev/null +++ b/src/rust/src/es/userdata.rs @@ -0,0 +1,555 @@ +/* Return a pointer to a string that holds the printable characters + * of the caption data block. FOR DEBUG PURPOSES ONLY! */ +use crate::bindings::{cc_subtitle, decode_vbi, do_cb, encoder_ctx, lib_cc_decode, store_hdcc}; +use crate::current_fps; +use crate::es::core::dump; +use lib_ccxr::common::{BitStreamRust, BitstreamError}; +use lib_ccxr::util::log::{DebugMessageFlag, ExitCause}; +use lib_ccxr::{debug, fatal, info}; + +fn debug_608_to_asc(cc_data: &[u8], channel: i32) -> String { + let mut output = " ".to_string(); + + if cc_data.len() < 3 { + return output; + } + + let cc_valid = (cc_data[0] & 4) >> 2; + let cc_type = (cc_data[0] & 3) as i32; + + if cc_valid != 0 && cc_type == channel { + let hi = cc_data[1] & 0x7F; // Get rid of parity bit + let lo = cc_data[2] & 0x7F; // Get rid of parity bit + if hi >= 0x20 { + output = format!("{}{}", hi as char, if lo >= 20 { lo as char } else { '.' }); + } else { + output = "<>".to_string(); + } + } + + output +} + +/// # Safety +/// This function is unsafe because it dereferences raw pointers and calls C functions like `do_cb` +pub unsafe fn user_data( + enc_ctx: &mut encoder_ctx, + dec_ctx: &mut lib_cc_decode, + ustream: &mut BitStreamRust, + udtype: i32, + sub: &mut cc_subtitle, +) -> Result { + debug!(msg_type = DebugMessageFlag::VERBOSE; "user_data({})", udtype); + + // Shall not happen + if ustream.error || ustream.bits_left <= 0 { + // ustream->error=1; + return Ok(0); // Actually discarded on call. + // CFS: Seen in a Wobble edited file. + // fatal(CCX_COMMON_EXIT_BUG_BUG, "user_data: Impossible!"); + } + + // Do something + dec_ctx.stat_numuserheaders += 1; + // header+=4; + + let ud_header = ustream.next_bytes(4)?; + if ustream.error || ustream.bits_left <= 0 { + return Ok(0); // Actually discarded on call. + // CFS: Seen in Stick_VHS.mpg. + // fatal(CCX_COMMON_EXIT_BUG_BUG, "user_data: Impossible!"); + } + + // DVD CC header, see + // + if ud_header.starts_with(&[0x43, 0x43]) { + dec_ctx.stat_dvdccheaders += 1; + + // Probably unneeded, but keep looking for extra caption blocks + let mut maybeextracb = true; + + ustream.read_bytes(4)?; // "43 43 01 F8" + + let pattern_flag = ustream.read_bits(1)? as u8; + ustream.read_bits(1)?; + let mut capcount = ustream.read_bits(5)? as i32; + let truncate_flag = ustream.read_bits(1)? as i32; // truncate_flag - one CB extra + + let mut field1packet = 0; // expect Field 1 first + if pattern_flag == 0x00 { + field1packet = 1; // expect Field 1 second + } + + debug!(msg_type = DebugMessageFlag::VERBOSE; "Reading {}{} DVD CC segments", + capcount, if truncate_flag != 0 { "+1" } else { "" }); + + capcount += truncate_flag; + + // This data comes before the first frame header, so + // in order to get the correct timing we need to set the + // current time to one frame after the maximum time of the + // last GOP. Only useful when there are frames before + // the GOP. + if (*dec_ctx.timing).fts_max > 0 { + (*dec_ctx.timing).fts_now = (*dec_ctx.timing).fts_max + (1000.0 / current_fps) as i64; + } + + let mut rcbcount = 0; + for i in 0..capcount { + for j in 0..2 { + let mut data = [0u8; 3]; + data[0] = ustream.bitstream_get_num(1, true)? as u8; + data[1] = ustream.bitstream_get_num(1, true)? as u8; + data[2] = ustream.bitstream_get_num(1, true)? as u8; + + // Obey the truncate flag. + if truncate_flag != 0 && i == capcount - 1 && j == 1 { + maybeextracb = false; + break; + } + /* Field 1 and 2 data can be in either order, + with marker bytes of \xff and \xfe + Since markers can be repeated, use pattern as well */ + if (data[0] & 0xFE) == 0xFE { + // Check if valid + if data[0] == 0xff && j == field1packet { + data[0] = 0x04; // Field 1 + } else { + data[0] = 0x05; // Field 2 + } + do_cb(dec_ctx, data.as_mut_ptr(), sub); + rcbcount += 1; + } else { + debug!(msg_type = DebugMessageFlag::VERBOSE; "Illegal caption segment - stop here."); + maybeextracb = false; + break; + } + } + } + // Theoretically this should not happen, oh well ... + // Deal with extra closed captions some DVD have. + let mut ecbcount = 0; + while maybeextracb && (ustream.bitstream_get_num(1, false)? as u8 & 0xFE) == 0xFE { + for j in 0..2 { + let mut data = [0u8; 3]; + data[0] = ustream.bitstream_get_num(1, true)? as u8; + data[1] = ustream.bitstream_get_num(1, true)? as u8; + data[2] = ustream.bitstream_get_num(1, true)? as u8; + /* Field 1 and 2 data can be in either order, + with marker bytes of \xff and \xfe + Since markers can be repeated, use pattern as well */ + if (data[0] & 0xFE) == 0xFE { + // Check if valid + if data[0] == 0xff && j == field1packet { + data[0] = 0x04; // Field 1 + } else { + data[0] = 0x05; // Field 2 + } + do_cb(dec_ctx, data.as_mut_ptr(), sub); + ecbcount += 1; + } else { + debug!(msg_type = DebugMessageFlag::VERBOSE; "Illegal (extra) caption segment - stop here."); + maybeextracb = false; + break; + } + } + } + + debug!(msg_type = DebugMessageFlag::VERBOSE; "Read {}/{} DVD CC blocks", rcbcount, ecbcount); + } + // SCTE 20 user data + else if dec_ctx.noscte20 == 0 && ud_header[0] == 0x03 { + // reserved - unspecified + if (ud_header[1] & 0x7F) == 0x01 { + let mut cc_data = [0u8; 3 * 31 + 1]; // Maximum cc_count is 31 + + dec_ctx.stat_scte20ccheaders += 1; + ustream.read_bytes(2)?; // "03 01" + + let cc_count = ustream.read_bits(5)? as u32; + debug!(msg_type = DebugMessageFlag::VERBOSE; "Reading {} SCTE 20 CC blocks", cc_count); + + for j in 0..cc_count { + ustream.skip_bits(2)?; // priority - unused + let field_number = ustream.read_bits(2)? as u32; + ustream.skip_bits(5)?; // line_offset - unused + let cc_data1 = ustream.read_bits(8)? as u32; + let cc_data2 = ustream.read_bits(8)? as u32; + ustream.read_bits(1)?; // TODO: Add syntax check */ + if ustream.bits_left < 0 { + fatal!(cause = ExitCause::Bug; "In user_data: ustream->bitsleft < 0. Cannot continue."); + } + + // Field_number is either + // 0 .. forbidden + // 1 .. field 1 (odd) + // 2 .. field 2 (even) + // 3 .. repeated, from repeat_first_field, effectively field 1 + if field_number < 1 { + // 0 is invalid + cc_data[(j * 3) as usize] = 0x00; // Set to invalid + cc_data[(j * 3 + 1) as usize] = 0x00; + cc_data[(j * 3 + 2) as usize] = 0x00; + } else { + // Treat field_number 3 as 1 + let mut field_number = (field_number - 1) & 0x01; + // top_field_first also affects to which field the caption + // belongs. + if dec_ctx.top_field_first == 0 { + field_number ^= 0x01; + } + cc_data[(j * 3) as usize] = 0x04 | (field_number as u8); + cc_data[(j * 3 + 1) as usize] = BitStreamRust::reverse8(cc_data1 as u8); + cc_data[(j * 3 + 2) as usize] = BitStreamRust::reverse8(cc_data2 as u8); + } + } + cc_data[(cc_count * 3) as usize] = 0xFF; + store_hdcc( + enc_ctx, + dec_ctx, + cc_data.as_mut_ptr(), + cc_count as _, + (*dec_ctx.timing).current_tref, + (*dec_ctx.timing).fts_now, + sub, + ); + + debug!(msg_type = DebugMessageFlag::VERBOSE; "Reading SCTE 20 CC blocks - done"); + } + } + // ReplayTV 4000/5000 caption header - parsing information + // derived from CCExtract.bdl + else if (ud_header[0] == 0xbb // ReplayTV 4000 + || ud_header[0] == 0x99) // ReplayTV 5000 + && ud_header[1] == 0x02 + { + let mut data = [0u8; 3]; + + if ud_header[0] == 0xbb { + dec_ctx.stat_replay4000headers += 1; + } else { + dec_ctx.stat_replay5000headers += 1; + } + + ustream.read_bytes(2)?; // "BB 02" or "99 02" + data[0] = 0x05; // Field 2 + data[1] = ustream.bitstream_get_num(1, true)? as u8; + data[2] = ustream.bitstream_get_num(1, true)? as u8; + do_cb(dec_ctx, data.as_mut_ptr(), sub); + ustream.read_bytes(2)?; // Skip "CC 02" for R4000 or "AA 02" for R5000 + data[0] = 0x04; // Field 1 + data[1] = ustream.bitstream_get_num(1, true)? as u8; + data[2] = ustream.bitstream_get_num(1, true)? as u8; + do_cb(dec_ctx, data.as_mut_ptr(), sub); + } + // HDTV - see A/53 Part 4 (Video) + else if ud_header.starts_with(&[0x47, 0x41, 0x39, 0x34]) { + dec_ctx.stat_hdtv += 1; + + ustream.read_bytes(4)?; // "47 41 39 34" + + let type_code = ustream.bitstream_get_num(1, true)? as u8; + if type_code == 0x03 { + // CC data. + ustream.skip_bits(1)?; // reserved + let process_cc_data = ustream.read_bits(1)? as u8; + ustream.skip_bits(1)?; // additional_data - unused + let cc_count = ustream.read_bits(5)? as u8; + ustream.read_bytes(1)?; // "FF" + if process_cc_data != 0 { + debug!(msg_type = DebugMessageFlag::VERBOSE; "Reading {} HDTV CC blocks", cc_count); + + let mut proceed = true; + let cc_data = ustream.read_bytes(cc_count as usize * 3)?; + if ustream.bits_left < 0 { + fatal!(cause = ExitCause::Bug; "In user_data: ustream->bitsleft < 0. Cannot continue."); + } + + // Check for proper marker - This read makes sure that + // cc_count*3+1 bytes are read and available in cc_data. + if ustream.bitstream_get_num(1, true)? as u8 != 0xFF { + proceed = false; + } + + if !proceed { + debug!(msg_type = DebugMessageFlag::VERBOSE; "\rThe following payload is not properly terminated."); + dump(cc_data.to_vec().as_mut_ptr(), (cc_count * 3 + 1) as _, 0, 0); + } + debug!(msg_type = DebugMessageFlag::VERBOSE; "Reading {} HD CC blocks", cc_count); + + // B-frames might be (temporal) before or after the anchor + // frame they belong to. Store the buffer until the next anchor + // frame occurs. The buffer will be flushed (sorted) in the + // picture header (or GOP) section when the next anchor occurs. + // Please note we store the current value of the global + // fts_now variable (and not get_fts()) as we are going to + // re-create the timeline in process_hdcc() (Slightly ugly). + store_hdcc( + enc_ctx, + dec_ctx, + cc_data.to_vec().as_mut_ptr(), + cc_count as _, + (*dec_ctx.timing).current_tref, + (*dec_ctx.timing).fts_now, + sub, + ); + + debug!(msg_type = DebugMessageFlag::VERBOSE; "Reading HDTV blocks - done"); + } + } + // reserved - additional_cc_data + } + // DVB closed caption header for Dish Network (Field 1 only) */ + else if ud_header.starts_with(&[0x05, 0x02]) { + // Like HDTV (above) Dish Network captions can be stored at each + // frame, but maximal two caption blocks per frame and only one + // field is stored. + // To process this with the HDTV framework we create a "HDTV" caption + // format compatible array. Two times 3 bytes plus one for the 0xFF + // marker at the end. Pre-init to field 1 and set the 0xFF marker. + let mut DISHDATA: [u8; 7] = [0x04, 0, 0, 0x04, 0, 0, 0xFF]; + let mut cc_count: usize; + + debug!(msg_type = DebugMessageFlag::VERBOSE; "Reading Dish Network user data"); + + dec_ctx.stat_dishheaders += 1; + + ustream.read_bytes(2)?; // "05 02" + + // The next bytes are like this: + // header[2] : ID: 0x04 (MPEG?), 0x03 (H264?) + // header[3-4]: Two byte counter (counting (sub-)GOPs?) + // header[5-6]: Two bytes, maybe checksum? + // header[7]: Pattern type + // on B-frame: 0x02, 0x04 + // on I-/P-frame: 0x05 + let id = ustream.bitstream_get_num(1, true)? as u8; + let dishcount = ustream.bitstream_get_num(2, true)? as u32; + let something = ustream.bitstream_get_num(2, true)? as u32; + let mut pattern_type = ustream.bitstream_get_num(1, true)? as u8; + debug!(msg_type = DebugMessageFlag::PARSE; "DN ID: {:02X} Count: {:5} Unknown: {:04X} Pattern: {:X}", + id, dishcount, something, pattern_type); + + // The following block needs 4 to 6 bytes starting from the + // current position + let dcd_pos = ustream.pos; // dish caption data position + match pattern_type { + 0x02 => { + // Two byte caption - always on B-frame + // The following 4 bytes are: + // 0 : 0x09 + // 1-2: caption block + // 3 : REPEAT - 02: two bytes + // - 04: four bytes (repeat first two) + let dcd_data = &ustream.data[dcd_pos..dcd_pos + 4]; + debug!(msg_type = DebugMessageFlag::PARSE; "\n02 {:02X} {:02X}:{:02X} - R:{:02X} :", + dcd_data[0], dcd_data[1], dcd_data[2], dcd_data[3]); + + cc_count = 1; + unsafe { + DISHDATA[1] = dcd_data[1]; + DISHDATA[2] = dcd_data[2]; + + debug!(msg_type = DebugMessageFlag::PARSE; "{}", debug_608_to_asc(&DISHDATA, 0)); + + pattern_type = dcd_data[3]; // repeater (0x02 or 0x04) + let hi = DISHDATA[1] & 0x7f; // Get only the 7 low bits + if pattern_type == 0x04 && hi < 32 { + // repeat (only for non-character pairs) + cc_count = 2; + DISHDATA[3] = 0x04; // Field 1 + DISHDATA[4] = DISHDATA[1]; + DISHDATA[5] = DISHDATA[2]; + + debug!(msg_type = DebugMessageFlag::PARSE; "{}:", debug_608_to_asc(&DISHDATA[3..], 0)); + } else { + debug!(msg_type = DebugMessageFlag::PARSE; ":"); + } + + DISHDATA[cc_count * 3] = 0xFF; // Set end marker + + store_hdcc( + enc_ctx, + dec_ctx, + DISHDATA.as_mut_ptr(), + cc_count as _, + (*dec_ctx.timing).current_tref, + (*dec_ctx.timing).fts_now, + sub, + ); + } + + // Ignore 3 (0x0A, followed by two unknown) bytes. + } + 0x04 => { + // Four byte caption - always on B-frame + // The following 5 bytes are: + // 0 : 0x09 + // 1-2: caption block + // 3-4: caption block + let dcd_data = &ustream.data[dcd_pos..dcd_pos + 5]; + debug!(msg_type = DebugMessageFlag::PARSE; "\n04 {:02X} {:02X}:{:02X}:{:02X}:{:02X} :", + dcd_data[0], dcd_data[1], dcd_data[2], dcd_data[3], dcd_data[4]); + + cc_count = 2; + unsafe { + DISHDATA[1] = dcd_data[1]; + DISHDATA[2] = dcd_data[2]; + + DISHDATA[3] = 0x04; // Field 1 + DISHDATA[4] = dcd_data[3]; + DISHDATA[5] = dcd_data[4]; + DISHDATA[6] = 0xFF; // Set end marker + + debug!(msg_type = DebugMessageFlag::PARSE; "{}", debug_608_to_asc(&DISHDATA, 0)); + debug!(msg_type = DebugMessageFlag::PARSE; "{}:", debug_608_to_asc(&DISHDATA[3..], 0)); + + store_hdcc( + enc_ctx, + dec_ctx, + DISHDATA.as_mut_ptr(), + cc_count as _, + (*dec_ctx.timing).current_tref, + (*dec_ctx.timing).fts_now, + sub, + ); + } + + // Ignore 4 (0x020A, followed by two unknown) bytes. + } + 0x05 => { + // Buffered caption - always on I-/P-frame + // The following six bytes are: + // 0 : 0x04 + // - the following are from previous 0x05 caption header - + // 1 : prev dcd[2] + // 2-3: prev dcd[3-4] + // 4-5: prev dcd[5-6] + let dcd_data = &ustream.data[dcd_pos..dcd_pos + 10]; // Need more bytes for this case + debug!(msg_type = DebugMessageFlag::PARSE; " - {:02X} pch: {:02X} {:5} {:02X}:{:02X}", + dcd_data[0], dcd_data[1], + (dcd_data[2] as u32) * 256 + (dcd_data[3] as u32), + dcd_data[4], dcd_data[5]); + + // Now one of the "regular" 0x02 or 0x04 captions follows + debug!(msg_type = DebugMessageFlag::PARSE; "{:02X} {:02X} {:02X}:{:02X}", + dcd_data[6], dcd_data[7], dcd_data[8], dcd_data[9]); + + pattern_type = dcd_data[6]; // Number of caption bytes (0x02 or 0x04) + + cc_count = 1; + unsafe { + DISHDATA[1] = dcd_data[8]; + DISHDATA[2] = dcd_data[9]; + + if pattern_type == 0x02 { + pattern_type = dcd_data[10]; // repeater (0x02 or 0x04) + + debug!(msg_type = DebugMessageFlag::PARSE; " - R:{:02X} :{}", pattern_type, debug_608_to_asc(&DISHDATA, 0)); + + let hi = DISHDATA[1] & 0x7f; // Get only the 7 low bits + if pattern_type == 0x04 && hi < 32 { + cc_count = 2; + DISHDATA[3] = 0x04; // Field 1 + DISHDATA[4] = DISHDATA[1]; + DISHDATA[5] = DISHDATA[2]; + debug!(msg_type = DebugMessageFlag::PARSE; "{}:", debug_608_to_asc(&DISHDATA[3..], 0)); + } else { + debug!(msg_type = DebugMessageFlag::PARSE; ":"); + } + DISHDATA[cc_count * 3] = 0xFF; // Set end marker + } else { + debug!(msg_type = DebugMessageFlag::PARSE; ":{:02X}:{:02X} ", + dcd_data[10], dcd_data[11]); + cc_count = 2; + DISHDATA[3] = 0x04; // Field 1 + DISHDATA[4] = dcd_data[10]; + DISHDATA[5] = dcd_data[11]; + DISHDATA[6] = 0xFF; // Set end marker + + debug!(msg_type = DebugMessageFlag::PARSE; ":{}", debug_608_to_asc(&DISHDATA, 0)); + debug!(msg_type = DebugMessageFlag::PARSE; "{}:", debug_608_to_asc(&DISHDATA[3..], 0)); + } + + store_hdcc( + enc_ctx, + dec_ctx, + DISHDATA.as_mut_ptr(), + cc_count as _, + (*dec_ctx.timing).current_tref, + (*dec_ctx.timing).fts_now, + sub, + ); + } + + // Ignore 3 (0x0A, followed by 2 unknown) bytes. + } + _ => { + // printf ("Unknown?\n"); + } + } // match + + debug!(msg_type = DebugMessageFlag::VERBOSE; "Reading Dish Network user data - done"); + } + // CEA 608 / aka "Divicom standard", see: + // http://www.pixeltools.com/tech_tip_closed_captioning.html + else if ud_header.starts_with(&[0x02, 0x09]) { + // Either a documentation or more examples are needed. + dec_ctx.stat_divicom += 1; + + let mut data = [0u8; 3]; + + ustream.read_bytes(2)?; // "02 09" + ustream.read_bytes(2)?; // "80 80" ??? + ustream.read_bytes(2)?; // "02 0A" ??? + data[0] = 0x04; // Field 1 + data[1] = ustream.bitstream_get_num(1, true)? as u8; + data[2] = ustream.bitstream_get_num(1, true)? as u8; + do_cb(dec_ctx, data.as_mut_ptr(), sub); + // This is probably incomplete! + } + // GXF vbi OEM code + else if ud_header.starts_with(&[0x73, 0x52, 0x21, 0x06]) { + let udatalen = ustream.data.len() - ustream.pos; + ustream.read_bytes(4)?; // skip header code + ustream.read_bytes(2)?; // skip data length + let line_nb = ustream.read_bits(16)? as u16; + let line_type = ustream.bitstream_get_num(1, true)? as u8; + let field = line_type & 0x03; + if field == 0 { + info!("MPEG:VBI: Invalid field"); + } + + let line_type = line_type >> 2; + if line_type != 1 { + info!("MPEG:VBI: only support Luma line"); + } + + if udatalen < 720 { + info!("MPEG:VBI: Minimum 720 bytes in luma line required"); + } + + let vbi_data = &ustream.data[ustream.pos..ustream.pos + 720]; + decode_vbi(dec_ctx, field, vbi_data.to_vec().as_mut_ptr(), 720, sub); + debug!(msg_type = DebugMessageFlag::VERBOSE; "GXF (vbi line {}) user data:", line_nb); + } else { + // Some other user data + // 06 02 ... Seems to be DirectTV + debug!(msg_type = DebugMessageFlag::VERBOSE; "Unrecognized user data:"); + let udatalen = ustream.data.len() - ustream.pos; + let dump_len = if udatalen > 128 { 128 } else { udatalen }; + dump( + ustream.data[ustream.pos..ustream.pos + dump_len] + .to_vec() + .as_mut_ptr(), + dump_len as _, + 0, + 0, + ); + } + + debug!(msg_type = DebugMessageFlag::VERBOSE; "User data - processed"); + + // Read complete + Ok(1) +} diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 218ff59f6..81be67c03 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -17,6 +17,7 @@ pub mod args; pub mod common; pub mod decoder; pub mod encoder; +pub mod es; #[cfg(feature = "hardsubx_ocr")] pub mod hardsubx; pub mod libccxr_exports; @@ -133,7 +134,7 @@ extern "C" fn ccxr_process_cc_data( if !validate_cc_pair(cc_block) { continue; } - let success = do_cb(dec_ctx, &mut dtvcc, cc_block); + let success = do_cb_dtvcc(dec_ctx, &mut dtvcc, cc_block); if success { ret = 0; } @@ -177,7 +178,7 @@ pub fn verify_parity(data: u8) -> bool { } /// Process CC data according to its type -pub fn do_cb(ctx: &mut lib_cc_decode, dtvcc: &mut Dtvcc, cc_block: &[u8]) -> bool { +pub fn do_cb_dtvcc(ctx: &mut lib_cc_decode, dtvcc: &mut Dtvcc, cc_block: &[u8]) -> bool { let cc_valid = (cc_block[0] & 4) >> 2; let cc_type = cc_block[0] & 3; let mut timeok = true; @@ -360,7 +361,7 @@ mod test { let mut decoder_ctx = lib_cc_decode::default(); let cc_block = [0x97, 0x1F, 0x3C]; - assert!(do_cb(&mut decoder_ctx, &mut dtvcc, &cc_block)); + assert!(do_cb_dtvcc(&mut decoder_ctx, &mut dtvcc, &cc_block)); assert_eq!(decoder_ctx.current_field, 3); assert_eq!(decoder_ctx.cc_stats[3], 1); assert_eq!(decoder_ctx.processed_enough, 0); diff --git a/src/rust/src/libccxr_exports/time.rs b/src/rust/src/libccxr_exports/time.rs index 58bf0aecc..fd22d1e9a 100644 --- a/src/rust/src/libccxr_exports/time.rs +++ b/src/rust/src/libccxr_exports/time.rs @@ -325,7 +325,9 @@ unsafe fn write_back_from_timing_info() { } /// Construct a [`GopTimeCode`] from `gop_time_code`. -unsafe fn generate_gop_time_code(g: gop_time_code) -> Option { +/// # Safety +/// This function is unsafe because it calls `from_raw_parts` which is unsafe. +pub unsafe fn generate_gop_time_code(g: gop_time_code) -> Option { if g.inited == 0 { None } else { @@ -341,7 +343,9 @@ unsafe fn generate_gop_time_code(g: gop_time_code) -> Option { } /// Construct a `gop_time_code` from [`GopTimeCode`]. -unsafe fn write_gop_time_code(g: Option) -> gop_time_code { +/// # Safety +/// This function is unsafe because it calls `as_raw_parts` which is unsafe. +pub unsafe fn write_gop_time_code(g: Option) -> gop_time_code { if let Some(gop) = g { let ( drop_frame, From 0a9c07100de911949cdec5219a321eff7318de88 Mon Sep 17 00:00:00 2001 From: steel-bucket Date: Sat, 23 Aug 2025 20:21:00 +0530 Subject: [PATCH 2/5] Windows Failing CI --- src/rust/Cargo.lock | 55 +- src/rust/Cargo.toml | 4 +- src/rust/src/es/gop.rs | 8 +- src/rust/src/es/pic.rs | 5 +- src/rust/src/es/userdata.rs | 3 +- src/rust/src/lib.rs | 783 +++++++++++++++------------- windows/ccextractor.vcxproj | 16 +- windows/ccextractor.vcxproj.filters | 15 +- 8 files changed, 474 insertions(+), 415 deletions(-) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index d9b3d3ec0..10d66389c 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -103,7 +103,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", "shlex", "syn 1.0.109", "which", @@ -111,25 +111,22 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.69.5" +version = "0.71.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" dependencies = [ "bitflags 2.9.0", "cexpr", "clang-sys", "itertools", - "lazy_static", - "lazycell", "log", "prettyplease", "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 2.1.1", "shlex", "syn 2.0.99", - "which", ] [[package]] @@ -606,7 +603,7 @@ dependencies = [ "num_enum", "strum 0.26.3", "strum_macros 0.26.4", - "thiserror", + "thiserror 1.0.69", "time", "url", ] @@ -898,14 +895,13 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rsmpeg" -version = "0.14.2+ffmpeg.6.1" +version = "0.15.2+ffmpeg.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "927012cd6ae43519f519741f4a69602ce3a47cf84750784da124dffd03527cc0" +checksum = "affc0df87c9691b97b25d3df4fd0ba6a037a0ebc27f37f6a6f0e5682443cf9a9" dependencies = [ - "libc", "paste", "rusty_ffmpeg", - "thiserror", + "thiserror 2.0.16", ] [[package]] @@ -914,6 +910,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustc_version" version = "0.4.1" @@ -944,13 +946,12 @@ checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" [[package]] name = "rusty_ffmpeg" -version = "0.13.3+ffmpeg.6.1" +version = "0.16.5+ffmpeg.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "716adffa5f909c8533611b1dab9ab5666bece35687845865b75ed6a990fc239c" +checksum = "0b371deca3d03b0b82510dccb72ad4d54d37a1f638e298f29da87063d4d525e5" dependencies = [ - "bindgen 0.69.5", + "bindgen 0.71.1", "camino", - "libc", "once_cell", "pkg-config", "vcpkg", @@ -1110,7 +1111,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" +dependencies = [ + "thiserror-impl 2.0.16", ] [[package]] @@ -1124,6 +1134,17 @@ dependencies = [ "syn 2.0.99", ] +[[package]] +name = "thiserror-impl" +version = "2.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.99", +] + [[package]] name = "time" version = "0.3.39" diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index 7a0701195..dde25d136 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -14,8 +14,8 @@ crate-type = ["staticlib"] log = "0.4.26" env_logger = "0.8.4" palette = "0.6.1" -rsmpeg = { version = "0.14.2", optional = true, features = [ - "link_system_ffmpeg", +rsmpeg = { version = "0.15.2", optional = true, features = [ + "link_vcpkg_ffmpeg", ] } tesseract-sys = { version = "0.5.15", optional = true, default-features = false } leptonica-sys = { version = "= 0.4.6", optional = true, default-features = false } diff --git a/src/rust/src/es/gop.rs b/src/rust/src/es/gop.rs index b754f1e4b..c1ceb3e7b 100644 --- a/src/rust/src/es/gop.rs +++ b/src/rust/src/es/gop.rs @@ -1,8 +1,9 @@ -use crate::bindings::{cc_subtitle, encoder_ctx, lib_cc_decode, process_hdcc}; +use crate::bindings::{cc_subtitle, encoder_ctx, lib_cc_decode}; use crate::libccxr_exports::time::{ ccxr_get_fts_max, ccxr_print_debug_timing, ccxr_set_current_pts, ccxr_set_fts, write_gop_time_code, }; +use crate::process_hdcc; use crate::{ ccx_options, current_fps, first_gop_time, frames_since_ref_time, fts_at_gop_start, gop_time, total_frames_count, MPEG_CLOCK_FREQ, @@ -185,7 +186,7 @@ unsafe fn gop_header( if ccx_options.use_gop_as_pts == 1 { ccxr_set_current_pts( dec_ctx.timing, - gtc.timestamp.millis() * (MPEG_CLOCK_FREQ as i64 / 1000), + (gtc.timestamp.millis() as i32 * (MPEG_CLOCK_FREQ as i32 / 1000)) as _, ); (*dec_ctx.timing).current_tref = 0; frames_since_ref_time = 0; @@ -197,7 +198,8 @@ unsafe fn gop_header( // next GOP. // This effect will also lead to captions being one GOP early // for DVD captions. - fts_at_gop_start = ccxr_get_fts_max(dec_ctx.timing) + (1000.0 / current_fps) as i64; + fts_at_gop_start = + (ccxr_get_fts_max(dec_ctx.timing) as i32 + (1000.0 / current_fps) as i32) as _; } if ccx_options.debug_mask & 4 != 0 { diff --git a/src/rust/src/es/pic.rs b/src/rust/src/es/pic.rs index f0cb32aa8..0fdb90be6 100644 --- a/src/rust/src/es/pic.rs +++ b/src/rust/src/es/pic.rs @@ -1,5 +1,6 @@ -use crate::bindings::{anchor_hdcc, cc_subtitle, encoder_ctx, lib_cc_decode, process_hdcc}; +use crate::bindings::{cc_subtitle, encoder_ctx, lib_cc_decode}; use crate::libccxr_exports::time::{ccxr_get_fts, ccxr_print_debug_timing, ccxr_set_fts}; +use crate::{anchor_hdcc, process_hdcc}; use crate::{ccx_options, frames_since_ref_time, fts_at_gop_start, total_frames_count}; use lib_ccxr::common::{BitStreamRust, BitstreamError, FrameType}; use lib_ccxr::debug; @@ -28,7 +29,7 @@ fn pic_header( } ctx.temporal_reference = esstream.read_bits(10)? as i32; - ctx.picture_coding_type = esstream.read_bits(3)? as u32; + ctx.picture_coding_type = esstream.read_bits(3)? as _; if ctx.picture_coding_type == FrameType::IFrame as _ { unsafe { diff --git a/src/rust/src/es/userdata.rs b/src/rust/src/es/userdata.rs index 1b6675a6d..a5c0f09d7 100644 --- a/src/rust/src/es/userdata.rs +++ b/src/rust/src/es/userdata.rs @@ -1,8 +1,9 @@ /* Return a pointer to a string that holds the printable characters * of the caption data block. FOR DEBUG PURPOSES ONLY! */ -use crate::bindings::{cc_subtitle, decode_vbi, do_cb, encoder_ctx, lib_cc_decode, store_hdcc}; +use crate::bindings::{cc_subtitle, encoder_ctx, lib_cc_decode}; use crate::current_fps; use crate::es::core::dump; +use crate::{decode_vbi, do_cb, store_hdcc}; use lib_ccxr::common::{BitStreamRust, BitstreamError}; use lib_ccxr::util::log::{DebugMessageFlag, ExitCause}; use lib_ccxr::{debug, fatal, info}; diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs index 81be67c03..7e4c4de15 100644 --- a/src/rust/src/lib.rs +++ b/src/rust/src/lib.rs @@ -1,370 +1,413 @@ -//! Rust library for CCExtractor -//! -//! Currently we are in the process of porting the 708 decoder to rust. See [decoder] - -// Allow C naming style -#![allow(non_upper_case_globals)] -#![allow(non_camel_case_types)] -#![allow(non_snake_case)] - -/// CCExtractor C bindings generated by bindgen -#[allow(clippy::all)] -pub mod bindings { - include!(concat!(env!("OUT_DIR"), "/bindings.rs")); -} - -pub mod args; -pub mod common; -pub mod decoder; -pub mod encoder; -pub mod es; -#[cfg(feature = "hardsubx_ocr")] -pub mod hardsubx; -pub mod libccxr_exports; -pub mod parser; -pub mod utils; - -#[cfg(windows)] -use std::os::windows::io::{FromRawHandle, RawHandle}; - -use args::Args; -use bindings::*; -use cfg_if::cfg_if; -use clap::{error::ErrorKind, Parser}; -use common::{copy_from_rust, CType, CType2}; -use decoder::Dtvcc; -use lib_ccxr::{common::Options, teletext::TeletextConfig, util::log::ExitCause}; -use parser::OptionsExt; -use utils::is_true; - -use env_logger::{builder, Target}; -use log::{warn, LevelFilter}; -use std::{ - ffi::CStr, - io::Write, - os::raw::{c_char, c_double, c_int, c_long, c_uint}, -}; - -// Mock data for rust unit tests -cfg_if! { - if #[cfg(test)] { - static mut cb_708: c_int = 0; - static mut cb_field1: c_int = 0; - static mut cb_field2: c_int = 0; - static mut current_fps: c_double = 30.0; - static mut usercolor_rgb: [c_int; 8] = [0; 8]; - static mut FILEBUFFERSIZE: c_int = 0; - static mut MPEG_CLOCK_FREQ: c_int = 90000; - - static mut frames_since_ref_time: c_int = 0; - static mut total_frames_count: c_uint = 0; - static mut fts_at_gop_start: c_long = 0; - static mut gop_rollover: c_int = 0; - static mut pts_big_change: c_uint = 0; - - static mut tlt_config: ccx_s_teletext_config = unsafe { std::mem::zeroed() }; - static mut ccx_options: ccx_s_options = unsafe { std::mem::zeroed() }; - static mut gop_time: gop_time_code = unsafe { std::mem::zeroed() }; - static mut first_gop_time: gop_time_code = unsafe { std::mem::zeroed() }; - static mut ccx_common_timing_settings: ccx_common_timing_settings_t = unsafe { std::mem::zeroed() }; - static mut capitalization_list: word_list = unsafe { std::mem::zeroed() }; - static mut profane: word_list = unsafe { std::mem::zeroed() }; - - unsafe extern "C" fn version(_location: *const c_char) {} - unsafe extern "C" fn set_binary_mode() {} - } -} - -// External C symbols (only when not testing) -#[cfg(not(test))] -extern "C" { - static mut cb_708: c_int; - static mut cb_field1: c_int; - static mut cb_field2: c_int; - static mut current_fps: c_double; - static mut usercolor_rgb: [c_int; 8]; - static mut FILEBUFFERSIZE: c_int; - static mut MPEG_CLOCK_FREQ: c_int; - static mut tlt_config: ccx_s_teletext_config; - static mut ccx_options: ccx_s_options; - static mut frames_since_ref_time: c_int; - static mut total_frames_count: c_uint; - static mut gop_time: gop_time_code; - static mut first_gop_time: gop_time_code; - static mut fts_at_gop_start: c_long; - static mut gop_rollover: c_int; - static mut ccx_common_timing_settings: ccx_common_timing_settings_t; - static mut capitalization_list: word_list; - static mut profane: word_list; - static mut pts_big_change: c_uint; - - fn version(location: *const c_char); - fn set_binary_mode(); -} - -/// Initialize env logger with custom format, using stdout as target -#[no_mangle] -pub extern "C" fn ccxr_init_logger() { - builder() - .format(|buf, record| writeln!(buf, "[CEA-708] {}", record.args())) - .filter_level(LevelFilter::Debug) - .target(Target::Stdout) - .init(); -} - -/// Process cc_data -/// -/// # Safety -/// dec_ctx should not be a null pointer -/// data should point to cc_data of length cc_count -#[no_mangle] -extern "C" fn ccxr_process_cc_data( - dec_ctx: *mut lib_cc_decode, - data: *const ::std::os::raw::c_uchar, - cc_count: c_int, -) -> c_int { - let mut ret = -1; - let mut cc_data: Vec = (0..cc_count * 3) - .map(|x| unsafe { *data.add(x as usize) }) - .collect(); - let dec_ctx = unsafe { &mut *dec_ctx }; - let dtvcc_ctx = unsafe { &mut *dec_ctx.dtvcc }; - let mut dtvcc = Dtvcc::new(dtvcc_ctx); - for cc_block in cc_data.chunks_exact_mut(3) { - if !validate_cc_pair(cc_block) { - continue; - } - let success = do_cb_dtvcc(dec_ctx, &mut dtvcc, cc_block); - if success { - ret = 0; - } - } - ret -} - -/// Returns `true` if cc_block pair is valid -/// -/// For CEA-708 data, only cc_valid is checked -/// For CEA-608 data, parity is also checked -pub fn validate_cc_pair(cc_block: &mut [u8]) -> bool { - let cc_valid = (cc_block[0] & 4) >> 2; - let cc_type = cc_block[0] & 3; - if cc_valid == 0 { - return false; - } - if cc_type == 0 || cc_type == 1 { - // For CEA-608 data we verify parity. - if verify_parity(cc_block[2]) { - // If the second byte doesn't pass parity, ignore pair - return false; - } - if verify_parity(cc_block[1]) { - // If the first byte doesn't pass parity, - // we replace it with a solid blank and process the pair. - cc_block[1] = 0x7F; - } - } - true -} - -/// Returns `true` if data has odd parity -/// -/// CC uses odd parity (i.e., # of 1's in byte is odd.) -pub fn verify_parity(data: u8) -> bool { - if data.count_ones() & 1 == 1 { - return true; - } - false -} - -/// Process CC data according to its type -pub fn do_cb_dtvcc(ctx: &mut lib_cc_decode, dtvcc: &mut Dtvcc, cc_block: &[u8]) -> bool { - let cc_valid = (cc_block[0] & 4) >> 2; - let cc_type = cc_block[0] & 3; - let mut timeok = true; - - if ctx.write_format != ccx_output_format::CCX_OF_DVDRAW - && ctx.write_format != ccx_output_format::CCX_OF_RAW - && (cc_block[0] == 0xFA || cc_block[0] == 0xFC || cc_block[0] == 0xFD) - && (cc_block[1] & 0x7F) == 0 - && (cc_block[2] & 0x7F) == 0 - { - return true; - } - - if cc_valid == 1 || cc_type == 3 { - ctx.cc_stats[cc_type as usize] += 1; - match cc_type { - // Type 0 and 1 are for CEA-608 data. Handled by C code, do nothing - 0 | 1 => {} - // Type 2 and 3 are for CEA-708 data. - 2 | 3 => { - let current_time = if ctx.timing.is_null() { - 0 - } else { - unsafe { (*ctx.timing).get_fts(ctx.current_field as u8) } - }; - ctx.current_field = 3; - - // Check whether current time is within start and end bounds - if is_true(ctx.extraction_start.set) - && current_time < ctx.extraction_start.time_in_ms - { - timeok = false; - } - if is_true(ctx.extraction_end.set) && current_time > ctx.extraction_end.time_in_ms { - timeok = false; - ctx.processed_enough = 1; - } - - if timeok && ctx.write_format != ccx_output_format::CCX_OF_RAW { - dtvcc.process_cc_data(cc_valid, cc_type, cc_block[1], cc_block[2]); - } - unsafe { cb_708 += 1 } - } - _ => warn!("Invalid cc_type"), - } - } - true -} - -#[cfg(windows)] -#[no_mangle] -extern "C" fn ccxr_close_handle(handle: RawHandle) { - use std::fs::File; - - if handle.is_null() { - return; - } - unsafe { - // File will close automatically (due to Drop) once it goes out of scope - let _file = File::from_raw_handle(handle); - } -} - -/// # Safety -/// Safe if argv is a valid pointer -/// -/// Parse parameters from argv and argc -#[no_mangle] -pub unsafe extern "C" fn ccxr_parse_parameters(argc: c_int, argv: *mut *mut c_char) -> c_int { - // Convert argv to Vec and pass it to parse_parameters - let args = std::slice::from_raw_parts(argv, argc as usize) - .iter() - .map(|&arg| { - CStr::from_ptr(arg) - .to_str() - .expect("Invalid UTF-8 sequence in argument") - .to_owned() - }) - .collect::>(); - - if args.len() <= 1 { - return ExitCause::NoInputFiles.exit_code(); - } - - let args: Args = match Args::try_parse_from(args) { - Ok(args) => args, - Err(e) => { - // Not all errors are actual errors, some are just help or version - // So handle them accordingly - match e.kind() { - ErrorKind::DisplayHelp => { - // Print the help string - println!("{e}"); - return ExitCause::WithHelp.exit_code(); - } - ErrorKind::DisplayVersion => { - version(*argv); - return ExitCause::WithHelp.exit_code(); - } - ErrorKind::UnknownArgument => { - println!("Unknown Argument"); - println!("{e}"); - return ExitCause::MalformedParameter.exit_code(); - } - _ => { - println!("{e}"); - return ExitCause::Failure.exit_code(); - } - } - } - }; - - let mut _capitalization_list: Vec = Vec::new(); - let mut _profane: Vec = Vec::new(); - - let mut opt = Options::default(); - let mut _tlt_config = TeletextConfig::default(); - - opt.parse_parameters( - &args, - &mut _tlt_config, - &mut _capitalization_list, - &mut _profane, - ); - tlt_config = _tlt_config.to_ctype(&opt); - - // Convert the rust struct (CcxOptions) to C struct (ccx_s_options), so that it can be used by the C code - copy_from_rust(&raw mut ccx_options, opt); - - if !_capitalization_list.is_empty() { - capitalization_list = _capitalization_list.to_ctype(); - } - if !_profane.is_empty() { - profane = _profane.to_ctype(); - } - - ExitCause::Ok.exit_code() -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_verify_parity() { - // Odd parity - assert!(verify_parity(0b1010001)); - - // Even parity - assert!(!verify_parity(0b1000001)); - } - - #[test] - fn test_validate_cc_pair() { - // Valid CEA-708 data - let mut cc_block = [0x97, 0x1F, 0x3C]; - assert!(validate_cc_pair(&mut cc_block)); - - // Invalid CEA-708 data - let mut cc_block = [0x93, 0x1F, 0x3C]; - assert!(!validate_cc_pair(&mut cc_block)); - - // Valid CEA-608 data - let mut cc_block = [0x15, 0x2F, 0x7D]; - assert!(validate_cc_pair(&mut cc_block)); - // Check for replaced bit when 1st byte doesn't pass parity - assert_eq!(cc_block[1], 0x7F); - - // Invalid CEA-608 data - let mut cc_block = [0x15, 0x2F, 0x5E]; - assert!(!validate_cc_pair(&mut cc_block)); - } - - #[test] - fn test_do_cb() { - let mut dtvcc_ctx = crate::decoder::test::initialize_dtvcc_ctx(); - - let mut dtvcc = Dtvcc::new(&mut dtvcc_ctx); - - let mut decoder_ctx = lib_cc_decode::default(); - let cc_block = [0x97, 0x1F, 0x3C]; - - assert!(do_cb_dtvcc(&mut decoder_ctx, &mut dtvcc, &cc_block)); - assert_eq!(decoder_ctx.current_field, 3); - assert_eq!(decoder_ctx.cc_stats[3], 1); - assert_eq!(decoder_ctx.processed_enough, 0); - assert_eq!(unsafe { cb_708 }, 11); - } -} +//! Rust library for CCExtractor +//! +//! Currently we are in the process of porting the 708 decoder to rust. See [decoder] + +// Allow C naming style +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +/// CCExtractor C bindings generated by bindgen +#[allow(clippy::all)] +pub mod bindings { + include!(concat!(env!("OUT_DIR"), "/bindings.rs")); +} + +pub mod args; +pub mod common; +pub mod decoder; +pub mod encoder; +pub mod es; +#[cfg(feature = "hardsubx_ocr")] +pub mod hardsubx; +pub mod libccxr_exports; +pub mod parser; +pub mod utils; + +#[cfg(windows)] +use std::os::windows::io::{FromRawHandle, RawHandle}; + +use args::Args; +use bindings::*; +use cfg_if::cfg_if; +use clap::{error::ErrorKind, Parser}; +use common::{copy_from_rust, CType, CType2}; +use decoder::Dtvcc; +use lib_ccxr::{common::Options, teletext::TeletextConfig, util::log::ExitCause}; +use parser::OptionsExt; +use utils::is_true; + +use env_logger::{builder, Target}; +use log::{warn, LevelFilter}; +use std::os::raw::c_uchar; +use std::{ + ffi::CStr, + io::Write, + os::raw::{c_char, c_double, c_int, c_long, c_uint}, +}; + +// Mock data for rust unit tests +cfg_if! { + if #[cfg(test)] { + static mut cb_708: c_int = 0; + static mut cb_field1: c_int = 0; + static mut cb_field2: c_int = 0; + static mut current_fps: c_double = 30.0; + static mut usercolor_rgb: [c_int; 8] = [0; 8]; + static mut FILEBUFFERSIZE: c_int = 0; + static mut MPEG_CLOCK_FREQ: c_int = 90000; + + static mut frames_since_ref_time: c_int = 0; + static mut total_frames_count: c_uint = 0; + static mut fts_at_gop_start: c_long = 0; + static mut gop_rollover: c_int = 0; + static mut pts_big_change: c_uint = 0; + + static mut tlt_config: ccx_s_teletext_config = unsafe { std::mem::zeroed() }; + static mut ccx_options: ccx_s_options = unsafe { std::mem::zeroed() }; + static mut gop_time: gop_time_code = unsafe { std::mem::zeroed() }; + static mut first_gop_time: gop_time_code = unsafe { std::mem::zeroed() }; + static mut ccx_common_timing_settings: ccx_common_timing_settings_t = unsafe { std::mem::zeroed() }; + static mut capitalization_list: word_list = unsafe { std::mem::zeroed() }; + static mut profane: word_list = unsafe { std::mem::zeroed() }; + + unsafe extern "C" fn version(_location: *const c_char) {} + unsafe extern "C" fn set_binary_mode() {} + fn process_hdcc(enc_ctx: *mut encoder_ctx, ctx: *mut lib_cc_decode, sub: *mut cc_subtitle){} + fn store_hdcc( + enc_ctx: *mut encoder_ctx, + ctx: *mut lib_cc_decode, + cc_data: *mut c_uchar, + cc_count: c_int, + sequence_number: c_int, + current_fts_now: LLONG, + sub: *mut cc_subtitle, + ){} + fn anchor_hdcc(ctx: *mut lib_cc_decode, seq: c_int){} + fn do_cb( + ctx: *mut lib_cc_decode, + cc_block: *mut c_uchar, + sub: *mut cc_subtitle, + ) -> c_int{0} + fn decode_vbi( + dec_ctx: *mut lib_cc_decode, + field: u8, + buffer: *mut c_uchar, + len: usize, + sub: *mut cc_subtitle, + ) -> c_int{0} + } +} + +// External C symbols (only when not testing) +#[cfg(not(test))] +extern "C" { + static mut cb_708: c_int; + static mut cb_field1: c_int; + static mut cb_field2: c_int; + static mut current_fps: c_double; + static mut usercolor_rgb: [c_int; 8]; + static mut FILEBUFFERSIZE: c_int; + static mut MPEG_CLOCK_FREQ: c_int; + static mut tlt_config: ccx_s_teletext_config; + static mut ccx_options: ccx_s_options; + static mut frames_since_ref_time: c_int; + static mut total_frames_count: c_uint; + static mut gop_time: gop_time_code; + static mut first_gop_time: gop_time_code; + static mut fts_at_gop_start: c_long; + static mut gop_rollover: c_int; + static mut ccx_common_timing_settings: ccx_common_timing_settings_t; + static mut capitalization_list: word_list; + static mut profane: word_list; + static mut pts_big_change: c_uint; + + fn version(location: *const c_char); + fn set_binary_mode(); + fn process_hdcc(enc_ctx: *mut encoder_ctx, ctx: *mut lib_cc_decode, sub: *mut cc_subtitle); + fn store_hdcc( + enc_ctx: *mut encoder_ctx, + ctx: *mut lib_cc_decode, + cc_data: *mut c_uchar, + cc_count: c_int, + sequence_number: c_int, + current_fts_now: LLONG, + sub: *mut cc_subtitle, + ); + fn anchor_hdcc(ctx: *mut lib_cc_decode, seq: c_int); + fn do_cb(ctx: *mut lib_cc_decode, cc_block: *mut c_uchar, sub: *mut cc_subtitle) -> c_int; + fn decode_vbi( + dec_ctx: *mut lib_cc_decode, + field: u8, + buffer: *mut c_uchar, + len: usize, + sub: *mut cc_subtitle, + ) -> c_int; +} + +/// Initialize env logger with custom format, using stdout as target +#[no_mangle] +pub extern "C" fn ccxr_init_logger() { + builder() + .format(|buf, record| writeln!(buf, "[CEA-708] {}", record.args())) + .filter_level(LevelFilter::Debug) + .target(Target::Stdout) + .init(); +} + +/// Process cc_data +/// +/// # Safety +/// dec_ctx should not be a null pointer +/// data should point to cc_data of length cc_count +#[no_mangle] +extern "C" fn ccxr_process_cc_data( + dec_ctx: *mut lib_cc_decode, + data: *const ::std::os::raw::c_uchar, + cc_count: c_int, +) -> c_int { + let mut ret = -1; + let mut cc_data: Vec = (0..cc_count * 3) + .map(|x| unsafe { *data.add(x as usize) }) + .collect(); + let dec_ctx = unsafe { &mut *dec_ctx }; + let dtvcc_ctx = unsafe { &mut *dec_ctx.dtvcc }; + let mut dtvcc = Dtvcc::new(dtvcc_ctx); + for cc_block in cc_data.chunks_exact_mut(3) { + if !validate_cc_pair(cc_block) { + continue; + } + let success = do_cb_dtvcc(dec_ctx, &mut dtvcc, cc_block); + if success { + ret = 0; + } + } + ret +} + +/// Returns `true` if cc_block pair is valid +/// +/// For CEA-708 data, only cc_valid is checked +/// For CEA-608 data, parity is also checked +pub fn validate_cc_pair(cc_block: &mut [u8]) -> bool { + let cc_valid = (cc_block[0] & 4) >> 2; + let cc_type = cc_block[0] & 3; + if cc_valid == 0 { + return false; + } + if cc_type == 0 || cc_type == 1 { + // For CEA-608 data we verify parity. + if verify_parity(cc_block[2]) { + // If the second byte doesn't pass parity, ignore pair + return false; + } + if verify_parity(cc_block[1]) { + // If the first byte doesn't pass parity, + // we replace it with a solid blank and process the pair. + cc_block[1] = 0x7F; + } + } + true +} + +/// Returns `true` if data has odd parity +/// +/// CC uses odd parity (i.e., # of 1's in byte is odd.) +pub fn verify_parity(data: u8) -> bool { + if data.count_ones() & 1 == 1 { + return true; + } + false +} + +/// Process CC data according to its type +pub fn do_cb_dtvcc(ctx: &mut lib_cc_decode, dtvcc: &mut Dtvcc, cc_block: &[u8]) -> bool { + let cc_valid = (cc_block[0] & 4) >> 2; + let cc_type = cc_block[0] & 3; + let mut timeok = true; + + if ctx.write_format != ccx_output_format::CCX_OF_DVDRAW + && ctx.write_format != ccx_output_format::CCX_OF_RAW + && (cc_block[0] == 0xFA || cc_block[0] == 0xFC || cc_block[0] == 0xFD) + && (cc_block[1] & 0x7F) == 0 + && (cc_block[2] & 0x7F) == 0 + { + return true; + } + + if cc_valid == 1 || cc_type == 3 { + ctx.cc_stats[cc_type as usize] += 1; + match cc_type { + // Type 0 and 1 are for CEA-608 data. Handled by C code, do nothing + 0 | 1 => {} + // Type 2 and 3 are for CEA-708 data. + 2 | 3 => { + let current_time = if ctx.timing.is_null() { + 0 + } else { + unsafe { (*ctx.timing).get_fts(ctx.current_field as u8) } + }; + ctx.current_field = 3; + + // Check whether current time is within start and end bounds + if is_true(ctx.extraction_start.set) + && current_time < ctx.extraction_start.time_in_ms + { + timeok = false; + } + if is_true(ctx.extraction_end.set) && current_time > ctx.extraction_end.time_in_ms { + timeok = false; + ctx.processed_enough = 1; + } + + if timeok && ctx.write_format != ccx_output_format::CCX_OF_RAW { + dtvcc.process_cc_data(cc_valid, cc_type, cc_block[1], cc_block[2]); + } + unsafe { cb_708 += 1 } + } + _ => warn!("Invalid cc_type"), + } + } + true +} + +#[cfg(windows)] +#[no_mangle] +extern "C" fn ccxr_close_handle(handle: RawHandle) { + use std::fs::File; + + if handle.is_null() { + return; + } + unsafe { + // File will close automatically (due to Drop) once it goes out of scope + let _file = File::from_raw_handle(handle); + } +} + +/// # Safety +/// Safe if argv is a valid pointer +/// +/// Parse parameters from argv and argc +#[no_mangle] +pub unsafe extern "C" fn ccxr_parse_parameters(argc: c_int, argv: *mut *mut c_char) -> c_int { + // Convert argv to Vec and pass it to parse_parameters + let args = std::slice::from_raw_parts(argv, argc as usize) + .iter() + .map(|&arg| { + CStr::from_ptr(arg) + .to_str() + .expect("Invalid UTF-8 sequence in argument") + .to_owned() + }) + .collect::>(); + + if args.len() <= 1 { + return ExitCause::NoInputFiles.exit_code(); + } + + let args: Args = match Args::try_parse_from(args) { + Ok(args) => args, + Err(e) => { + // Not all errors are actual errors, some are just help or version + // So handle them accordingly + match e.kind() { + ErrorKind::DisplayHelp => { + // Print the help string + println!("{e}"); + return ExitCause::WithHelp.exit_code(); + } + ErrorKind::DisplayVersion => { + version(*argv); + return ExitCause::WithHelp.exit_code(); + } + ErrorKind::UnknownArgument => { + println!("Unknown Argument"); + println!("{e}"); + return ExitCause::MalformedParameter.exit_code(); + } + _ => { + println!("{e}"); + return ExitCause::Failure.exit_code(); + } + } + } + }; + + let mut _capitalization_list: Vec = Vec::new(); + let mut _profane: Vec = Vec::new(); + + let mut opt = Options::default(); + let mut _tlt_config = TeletextConfig::default(); + + opt.parse_parameters( + &args, + &mut _tlt_config, + &mut _capitalization_list, + &mut _profane, + ); + tlt_config = _tlt_config.to_ctype(&opt); + + // Convert the rust struct (CcxOptions) to C struct (ccx_s_options), so that it can be used by the C code + copy_from_rust(&raw mut ccx_options, opt); + + if !_capitalization_list.is_empty() { + capitalization_list = _capitalization_list.to_ctype(); + } + if !_profane.is_empty() { + profane = _profane.to_ctype(); + } + + ExitCause::Ok.exit_code() +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_verify_parity() { + // Odd parity + assert!(verify_parity(0b1010001)); + + // Even parity + assert!(!verify_parity(0b1000001)); + } + + #[test] + fn test_validate_cc_pair() { + // Valid CEA-708 data + let mut cc_block = [0x97, 0x1F, 0x3C]; + assert!(validate_cc_pair(&mut cc_block)); + + // Invalid CEA-708 data + let mut cc_block = [0x93, 0x1F, 0x3C]; + assert!(!validate_cc_pair(&mut cc_block)); + + // Valid CEA-608 data + let mut cc_block = [0x15, 0x2F, 0x7D]; + assert!(validate_cc_pair(&mut cc_block)); + // Check for replaced bit when 1st byte doesn't pass parity + assert_eq!(cc_block[1], 0x7F); + + // Invalid CEA-608 data + let mut cc_block = [0x15, 0x2F, 0x5E]; + assert!(!validate_cc_pair(&mut cc_block)); + } + + #[test] + fn test_do_cb() { + let mut dtvcc_ctx = crate::decoder::test::initialize_dtvcc_ctx(); + + let mut dtvcc = Dtvcc::new(&mut dtvcc_ctx); + + let mut decoder_ctx = lib_cc_decode::default(); + let cc_block = [0x97, 0x1F, 0x3C]; + + assert!(do_cb_dtvcc(&mut decoder_ctx, &mut dtvcc, &cc_block)); + assert_eq!(decoder_ctx.current_field, 3); + assert_eq!(decoder_ctx.cc_stats[3], 1); + assert_eq!(decoder_ctx.processed_enough, 0); + assert_eq!(unsafe { cb_708 }, 11); + } +} diff --git a/windows/ccextractor.vcxproj b/windows/ccextractor.vcxproj index 296793c4e..9380302a9 100644 --- a/windows/ccextractor.vcxproj +++ b/windows/ccextractor.vcxproj @@ -1,6 +1,5 @@  - + Debug-Full @@ -202,8 +201,7 @@ Application v143 - + Application v143 @@ -211,14 +209,10 @@ - + - + @@ -254,7 +248,7 @@ "C:\Program Files\GPAC\sdk\include";%(AdditionalIncludeDirectories) SEGMENT_BY_FILE_TIME;ENABLE_HARDSUBX;FT2_BUILD_LIBRARY;GPAC_DISABLE_VTT;GPAC_DISABLE_OD_DUMP;ENABLE_OCR;WIN32;_DEBUG;_CONSOLE;_FILE_OFFSET_BITS=64;GPAC_DISABLE_REMOTERY;GPAC_DISABLE_ZLIB;%(PreprocessorDefinitions) - EnableFastChecks + Default MultiThreadedDebug diff --git a/windows/ccextractor.vcxproj.filters b/windows/ccextractor.vcxproj.filters index b7afa5a48..4dcc71ed6 100644 --- a/windows/ccextractor.vcxproj.filters +++ b/windows/ccextractor.vcxproj.filters @@ -258,17 +258,14 @@ Header Files + + Header Files + + + Header Files + - - Source Files\ccx_decoders - - - Source Files\ccx_decoders - - - Source Files\ccx_decoders - Source Files From 030339adc7e0544c175b9374ab8ba0f1cef909ce Mon Sep 17 00:00:00 2001 From: Deepnarayan Sett Date: Sat, 23 Aug 2025 20:24:13 +0530 Subject: [PATCH 3/5] ES module: Clippy changes --- src/rust/src/es/gop.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rust/src/es/gop.rs b/src/rust/src/es/gop.rs index c1ceb3e7b..c2bad8f0f 100644 --- a/src/rust/src/es/gop.rs +++ b/src/rust/src/es/gop.rs @@ -186,7 +186,7 @@ unsafe fn gop_header( if ccx_options.use_gop_as_pts == 1 { ccxr_set_current_pts( dec_ctx.timing, - (gtc.timestamp.millis() as i32 * (MPEG_CLOCK_FREQ as i32 / 1000)) as _, + (gtc.timestamp.millis() as i32 * (MPEG_CLOCK_FREQ / 1000)) as _, ); (*dec_ctx.timing).current_tref = 0; frames_since_ref_time = 0; From 6dab3996084f231c43ea92df0d726fad8c6f5faa Mon Sep 17 00:00:00 2001 From: Deepnarayan Sett Date: Sat, 23 Aug 2025 20:30:19 +0530 Subject: [PATCH 4/5] ES module: Cmake failing CI --- src/rust/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index dde25d136..7a0701195 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -14,8 +14,8 @@ crate-type = ["staticlib"] log = "0.4.26" env_logger = "0.8.4" palette = "0.6.1" -rsmpeg = { version = "0.15.2", optional = true, features = [ - "link_vcpkg_ffmpeg", +rsmpeg = { version = "0.14.2", optional = true, features = [ + "link_system_ffmpeg", ] } tesseract-sys = { version = "0.5.15", optional = true, default-features = false } leptonica-sys = { version = "= 0.4.6", optional = true, default-features = false } From b7f2c6944da7ea3ee9f9a16dbd2d26bd49434f30 Mon Sep 17 00:00:00 2001 From: Deepnarayan Sett Date: Sat, 23 Aug 2025 20:31:09 +0530 Subject: [PATCH 5/5] ES module: Cmake failing CI --- src/rust/Cargo.lock | 55 ++++++++++++++------------------------------- 1 file changed, 17 insertions(+), 38 deletions(-) diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 10d66389c..d9b3d3ec0 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -103,7 +103,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash 1.1.0", + "rustc-hash", "shlex", "syn 1.0.109", "which", @@ -111,22 +111,25 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.71.1" +version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ "bitflags 2.9.0", "cexpr", "clang-sys", "itertools", + "lazy_static", + "lazycell", "log", "prettyplease", "proc-macro2", "quote", "regex", - "rustc-hash 2.1.1", + "rustc-hash", "shlex", "syn 2.0.99", + "which", ] [[package]] @@ -603,7 +606,7 @@ dependencies = [ "num_enum", "strum 0.26.3", "strum_macros 0.26.4", - "thiserror 1.0.69", + "thiserror", "time", "url", ] @@ -895,13 +898,14 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rsmpeg" -version = "0.15.2+ffmpeg.7.1" +version = "0.14.2+ffmpeg.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "affc0df87c9691b97b25d3df4fd0ba6a037a0ebc27f37f6a6f0e5682443cf9a9" +checksum = "927012cd6ae43519f519741f4a69602ce3a47cf84750784da124dffd03527cc0" dependencies = [ + "libc", "paste", "rusty_ffmpeg", - "thiserror 2.0.16", + "thiserror", ] [[package]] @@ -910,12 +914,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" -[[package]] -name = "rustc-hash" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" - [[package]] name = "rustc_version" version = "0.4.1" @@ -946,12 +944,13 @@ checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" [[package]] name = "rusty_ffmpeg" -version = "0.16.5+ffmpeg.7.1" +version = "0.13.3+ffmpeg.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b371deca3d03b0b82510dccb72ad4d54d37a1f638e298f29da87063d4d525e5" +checksum = "716adffa5f909c8533611b1dab9ab5666bece35687845865b75ed6a990fc239c" dependencies = [ - "bindgen 0.71.1", + "bindgen 0.69.5", "camino", + "libc", "once_cell", "pkg-config", "vcpkg", @@ -1111,16 +1110,7 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl 1.0.69", -] - -[[package]] -name = "thiserror" -version = "2.0.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" -dependencies = [ - "thiserror-impl 2.0.16", + "thiserror-impl", ] [[package]] @@ -1134,17 +1124,6 @@ dependencies = [ "syn 2.0.99", ] -[[package]] -name = "thiserror-impl" -version = "2.0.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.99", -] - [[package]] name = "time" version = "0.3.39"