From 873ffcd88a7105d784be4e7237ac99870b834db8 Mon Sep 17 00:00:00 2001 From: Audionut Date: Sun, 14 Sep 2025 22:34:44 +1000 Subject: [PATCH 1/2] HDB - hide copied comparisons --- src/bbcode.py | 35 +++++++++++++++++++++++++++++++++++ src/trackers/HDB.py | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/bbcode.py b/src/bbcode.py index 042734505..84e5ed578 100644 --- a/src/bbcode.py +++ b/src/bbcode.py @@ -557,6 +557,41 @@ def convert_code_to_quote(self, desc): desc = desc.replace('[/code]', '[/quote]') return desc + def convert_comparison_to_hide(self, desc): + comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc) + for comp in comparisons: + # Extract sources and count them + comp_sources = comp.split(']', 1)[0].replace('[comparison=', '').strip() + comp_sources = re.split(r"\s*,\s*", comp_sources) + num_sources = len(comp_sources) + + # Extract all image URLs + comp_content = comp.split(']', 1)[1].replace('[/comparison]', '') + comp_images = re.findall(r"(https?:\/\/[^\s\[\]]+\.(?:png|jpg))", comp_content, flags=re.IGNORECASE) + + # Arrange images in groups matching the number of sources + arranged_images = [] + for i in range(0, len(comp_images), num_sources): + group = comp_images[i:i + num_sources] + if len(group) == num_sources: + arranged_images.extend(group) + + # Format the images as comma-separated groups + formatted_images = [] + for i in range(0, len(arranged_images), num_sources): + group = arranged_images[i:i + num_sources] + formatted_images.append(', '.join(group)) + + # Join all groups with newlines + final_images = '\n'.join(formatted_images) + + # Create the hide tag + sources_label = ' vs '.join(comp_sources) + new_bbcode = f"[hide={sources_label}]{final_images}[/hide]" + desc = desc.replace(comp, new_bbcode) + + return desc + def convert_comparison_to_collapse(self, desc, max_width): comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc) for comp in comparisons: diff --git a/src/trackers/HDB.py b/src/trackers/HDB.py index eea00fd28..5edcf2b98 100644 --- a/src/trackers/HDB.py +++ b/src/trackers/HDB.py @@ -538,7 +538,7 @@ async def edit_desc(self, meta): desc = desc.replace("[ol]", "").replace("[/ol]", "") desc = desc.replace("[*]", "* ") desc = bbcode.convert_spoiler_to_hide(desc) - desc = bbcode.convert_comparison_to_centered(desc, 1000) + desc = bbcode.convert_comparison_to_hide(desc) desc = re.sub(r"(\[img=\d+)]", "[img]", desc, flags=re.IGNORECASE) desc = re.sub(r"\[/size\]|\[size=\d+\]", "", desc, flags=re.IGNORECASE) descfile.write(desc) From e953c698c092d2f74b5736a0175d359fa39106f8 Mon Sep 17 00:00:00 2001 From: Audionut Date: Mon, 15 Sep 2025 22:20:13 +1000 Subject: [PATCH 2/2] rehost the images --- src/bbcode.py | 64 ++++++++++++++---- src/trackermeta.py | 145 ++++++++++++++++++++++++++++++++++++++++ src/trackers/HDB.py | 156 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 351 insertions(+), 14 deletions(-) diff --git a/src/bbcode.py b/src/bbcode.py index 84e5ed578..41f2c045a 100644 --- a/src/bbcode.py +++ b/src/bbcode.py @@ -557,33 +557,69 @@ def convert_code_to_quote(self, desc): desc = desc.replace('[/code]', '[/quote]') return desc - def convert_comparison_to_hide(self, desc): + def extract_comparison_images(self, desc): + comparison_images = {} comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc) + for comp in comparisons: # Extract sources and count them comp_sources = comp.split(']', 1)[0].replace('[comparison=', '').strip() comp_sources = re.split(r"\s*,\s*", comp_sources) num_sources = len(comp_sources) - - # Extract all image URLs + sources_label = ' vs '.join(comp_sources) comp_content = comp.split(']', 1)[1].replace('[/comparison]', '') - comp_images = re.findall(r"(https?:\/\/[^\s\[\]]+\.(?:png|jpg))", comp_content, flags=re.IGNORECASE) + comp_images = re.findall(r"(https?:\/\/[^\s\[\]]+\.(?:png|jpg|jpeg|gif|webp))", comp_content, flags=re.IGNORECASE) - # Arrange images in groups matching the number of sources - arranged_images = [] + # Organize images into groups matching the number of sources + image_groups = [] for i in range(0, len(comp_images), num_sources): group = comp_images[i:i + num_sources] if len(group) == num_sources: - arranged_images.extend(group) + image_groups.append(group) - # Format the images as comma-separated groups - formatted_images = [] - for i in range(0, len(arranged_images), num_sources): - group = arranged_images[i:i + num_sources] - formatted_images.append(', '.join(group)) + if image_groups: + comparison_images[sources_label] = image_groups + + return comparison_images + + def convert_comparison_to_hide(self, desc): + comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc) + for comp in comparisons: + # Extract sources and count them + comp_sources = comp.split(']', 1)[0].replace('[comparison=', '').strip() + comp_sources = re.split(r"\s*,\s*", comp_sources) + num_sources = len(comp_sources) + + comp_content = comp.split(']', 1)[1].replace('[/comparison]', '') - # Join all groups with newlines - final_images = '\n'.join(formatted_images) + if '[url=' in comp_content and '[img]' in comp_content: + # Content has BBCode tags - extract them directly + bbcode_matches = re.findall(r'\[url=.*?\]\[img\].*?\[/img\]\[/url\]', comp_content) + formatted_images = [] + for i in range(0, len(bbcode_matches), num_sources): + group = bbcode_matches[i:i + num_sources] + if len(group) == num_sources: + formatted_images.append(', '.join(group)) + + final_images = '\n'.join(formatted_images) + else: + # Content has plain URLs + comp_images = re.findall(r"(https?:\/\/[^\s\[\]]+\.(?:png|jpg))", comp_content, flags=re.IGNORECASE) + + # Arrange images in groups matching the number of sources + arranged_images = [] + for i in range(0, len(comp_images), num_sources): + group = comp_images[i:i + num_sources] + if len(group) == num_sources: + arranged_images.extend(group) + + # Format the images as comma-separated groups + formatted_images = [] + for i in range(0, len(arranged_images), num_sources): + group = arranged_images[i:i + num_sources] + formatted_images.append(', '.join(group)) + + final_images = '\n'.join(formatted_images) # Create the hide tag sources_label = ' vs '.join(comp_sources) diff --git a/src/trackermeta.py b/src/trackermeta.py index b6f536873..12b7b69ab 100644 --- a/src/trackermeta.py +++ b/src/trackermeta.py @@ -172,6 +172,151 @@ async def bounded_check(image_dict): return valid_images +async def download_comparison_images(comparison_images, meta): + if not comparison_images: + return {} + + save_directory = f"{meta['base_dir']}/tmp/{meta['uuid']}/comparisons" + os.makedirs(save_directory, exist_ok=True) + + timeout = aiohttp.ClientTimeout(total=30, connect=10, sock_connect=10, sock_read=10) + downloaded_comparisons = {} + + semaphore = asyncio.Semaphore(2) + + async def download_image_with_semaphore(url, filepath, skip_existing=True): + # Check if file already exists and is valid + if skip_existing and os.path.exists(filepath): + try: + if os.path.getsize(filepath) > 1024: # At least 1KB + from PIL import Image + with Image.open(filepath) as img: + img.verify() + print(f"\r{' ' * 80}\rSkipping existing image: {os.path.basename(filepath)}", end="", flush=True) + return filepath + except Exception: + # If file is corrupted, delete it and re-download + print(f"\r{' ' * 80}\rExisting file corrupted, re-downloading: {os.path.basename(filepath)}", end="", flush=True) + try: + os.remove(filepath) + except Exception: + pass + + async with semaphore: + try: + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.get(url) as response: + if response.status == 200: + image_content = await response.read() + with open(filepath, "wb") as f: + f.write(image_content) + print(f"\r{' ' * 80}\rDownloaded comparison image: {os.path.basename(filepath)}", end="", flush=True) + # Add 500ms delay after successful download + await asyncio.sleep(0.5) + return filepath + else: + console.print(f"[red]Failed to download comparison image {url}. Status: {response.status}") + return None + except Exception as e: + console.print(f"[red]Error downloading comparison image {url}: {e}") + return None + + failed_downloads = [] + + for comp_label, image_groups in comparison_images.items(): + console.print(f"\n[cyan]Downloading comparison images for: {comp_label}") + + safe_label = "".join(c for c in comp_label if c.isalnum() or c in (' ', '-', '_')).rstrip() + safe_label = safe_label.replace(' ', '_') + comp_dir = os.path.join(save_directory, safe_label) + os.makedirs(comp_dir, exist_ok=True) + + downloaded_groups = [] + download_tasks = [] + task_info = [] # Track which task belongs to which group/image + + for group_idx, image_group in enumerate(image_groups): + for img_idx, img_url in enumerate(image_group): + img_extension = os.path.splitext(img_url)[1] or '.jpg' + filename = f"group_{group_idx:03d}_img_{img_idx:02d}{img_extension}" + filepath = os.path.join(comp_dir, filename) + + task = download_image_with_semaphore(img_url, filepath) + download_tasks.append(task) + task_info.append((group_idx, img_idx, img_url, filepath)) + + # Execute all download tasks concurrently (but limited by semaphore) + if download_tasks: + results = await asyncio.gather(*download_tasks, return_exceptions=True) + group_results = {} + for i, result in enumerate(results): + group_idx, img_idx, img_url, filepath = task_info[i] + + if isinstance(result, Exception): + console.print(f"[red]Download task failed with exception: {result}") + failed_downloads.append((img_url, filepath)) + continue + + if result: + if group_idx not in group_results: + group_results[group_idx] = {} + group_results[group_idx][img_idx] = result + else: + failed_downloads.append((img_url, filepath)) + + for group_idx in sorted(group_results.keys()): + downloaded_group = [] + for img_idx in sorted(group_results[group_idx].keys()): + downloaded_group.append(group_results[group_idx][img_idx]) + if downloaded_group: + downloaded_groups.append(downloaded_group) + + if downloaded_groups: + downloaded_comparisons[comp_label] = downloaded_groups + + print(f"\r{' ' * 80}\r", end="", flush=True) + + # Retry failed downloads once + if failed_downloads: + console.print(f"[yellow]Retrying {len(failed_downloads)} failed downloads...") + retry_tasks = [] + retry_info = [] + + for img_url, filepath in failed_downloads: + task = download_image_with_semaphore(img_url, filepath, skip_existing=False) + retry_tasks.append(task) + retry_info.append((img_url, filepath)) + + if retry_tasks: + retry_results = await asyncio.gather(*retry_tasks, return_exceptions=True) + + successful_retries = 0 + for i, result in enumerate(retry_results): + img_url, filepath = retry_info[i] + + if not isinstance(result, Exception) and result: + successful_retries += 1 + print(f"\r{' ' * 80}\rRetry successful: {os.path.basename(filepath)}", end="", flush=True) + + for comp_label, groups in downloaded_comparisons.items(): + comp_dir = os.path.dirname(filepath) + if comp_label.replace(' ', '_').replace(' vs ', '_vs_') in comp_dir: + if not groups: + groups.append([result]) + else: + groups[-1].append(result) + break + else: + console.print(f"[red]Retry failed for: {os.path.basename(filepath)}") + + if successful_retries > 0: + print(f"\r{' ' * 80}\rSuccessfully retried {successful_retries} out of {len(failed_downloads)} failed downloads") + + print("") + + return downloaded_comparisons + + async def check_image_link(url, timeout=None): # Handle when pixhost url points to web_url and convert to raw_url if url.startswith("https://pixhost.to/show/"): diff --git a/src/trackers/HDB.py b/src/trackers/HDB.py index 5edcf2b98..c465ba139 100644 --- a/src/trackers/HDB.py +++ b/src/trackers/HDB.py @@ -14,6 +14,7 @@ from datetime import datetime from torf import Torrent from src.torrentcreate import CustomTorrent, torf_cb, create_torrent +from src.trackermeta import download_comparison_images class HDB(): @@ -538,6 +539,17 @@ async def edit_desc(self, meta): desc = desc.replace("[ol]", "").replace("[/ol]", "") desc = desc.replace("[*]", "* ") desc = bbcode.convert_spoiler_to_hide(desc) + + comparison_images = bbcode.extract_comparison_images(desc) + if comparison_images: + console.print(f"[cyan]Found {len(comparison_images)} comparison sections to rehost") + + downloaded_comparisons = await download_comparison_images(comparison_images, meta) + if downloaded_comparisons: + # Rehost the downloaded comparison images + rehosted_comparisons = await self.rehost_comparison_images(downloaded_comparisons, meta) + desc = await self.replace_comparison_images_in_desc(desc, comparison_images, rehosted_comparisons) + desc = bbcode.convert_comparison_to_hide(desc) desc = re.sub(r"(\[img=\d+)]", "[img]", desc, flags=re.IGNORECASE) desc = re.sub(r"\[/size\]|\[size=\d+\]", "", desc, flags=re.IGNORECASE) @@ -846,3 +858,147 @@ async def search_filename(self, search_term, search_file_folder, meta): console.print('[yellow]Could not find a matching release on HDB[/yellow]') return hdb_imdb, hdb_tvdb, hdb_name, hdb_torrenthash, hdb_description, hdb_id + + async def rehost_comparison_images(self, downloaded_comparisons, meta): + rehosted_comparisons = {} + + for comp_label, image_groups in downloaded_comparisons.items(): + console.print(f"[green]Rehosting comparison images for: {comp_label}") + all_image_paths = [] + group_structure = [] # Track which images belong to which group + + for group_idx, image_group in enumerate(image_groups): + group_start = len(all_image_paths) + all_image_paths.extend(image_group) + group_structure.append((group_start, len(image_group))) + + if all_image_paths: + bbcode_result = await self.upload_comparison_batch_to_hdb(all_image_paths, meta, comp_label) + if bbcode_result: + bbcode_matches = re.findall(r'\[url=.*?\]\[img\].*?\[/img\]\[/url\]', bbcode_result) + if bbcode_matches: + num_sources = len(image_groups[0]) if image_groups else 4 + formatted_bbcode = "" + for i in range(0, len(bbcode_matches), num_sources): + line = " ".join(bbcode_matches[i:i+num_sources]) + if i + num_sources < len(bbcode_matches): + formatted_bbcode += line + "\n" + else: + formatted_bbcode += line + + rehosted_comparisons[comp_label] = formatted_bbcode + console.print(f"[green]Successfully rehosted {len(all_image_paths)} images for: {comp_label}") + else: + console.print(f"[red]No BBCode matches found in upload result for: {comp_label}") + else: + console.print(f"[red]Failed to rehost images for: {comp_label}") + + return rehosted_comparisons + + async def upload_comparison_batch_to_hdb(self, image_paths, meta, comp_label): + # Split into smaller batches to avoid 413 Payload Too Large error + max_batch_size = 10 + all_bbcode_results = [] + + for batch_start in range(0, len(image_paths), max_batch_size): + batch_end = min(batch_start + max_batch_size, len(image_paths)) + batch_paths = image_paths[batch_start:batch_end] + batch_num = (batch_start // max_batch_size) + 1 + total_batches = (len(image_paths) + max_batch_size - 1) // max_batch_size + + if meta.get('debug'): + console.print(f"[cyan]Uploading batch {batch_num}/{total_batches} ({len(batch_paths)} images) for: {comp_label}") + + bbcode_result = await self.upload_single_batch_to_hdb(batch_paths, meta, comp_label, batch_num) + if bbcode_result: + all_bbcode_results.append(bbcode_result) + else: + console.print(f"[red]Failed to upload batch {batch_num} for: {comp_label}") + + if all_bbcode_results: + combined_bbcode = "\n".join(all_bbcode_results) + console.print(f"[green]Successfully uploaded all batches for: {comp_label}") + return combined_bbcode + else: + console.print(f"[red]All upload batches failed for: {comp_label}") + return None + + async def upload_single_batch_to_hdb(self, image_paths, meta, comp_label, batch_num, retry_attempt=0): + max_retries = 2 + timeout_codes = [504, 524, 408, 502, 503] # timeout/server error codes + + try: + url = "https://img.hdbits.org/upload_api.php" + data = { + 'username': self.username, + 'passkey': self.passkey, + 'galleryoption': '1', + 'galleryname': f"{meta['name']} - {comp_label} - Batch {batch_num}", + 'thumbsize': 'w100' + } + + files = {} + for i, image_path in enumerate(image_paths): + try: + filename = os.path.basename(image_path) + files[f'images_files[{i}]'] = (filename, open(image_path, 'rb'), 'image/png') + except Exception as e: + console.print(f"[red]Failed to open {image_path}: {e}") + continue + + if not files: + console.print(f"[red]No files to upload in batch {batch_num}") + return None + + response = requests.post(url, data=data, files=files, timeout=120) + + if response.status_code == 200: + if meta.get('debug'): + console.print(f"[green]Batch {batch_num} upload successful ({len(files)} images)") + return response.text + elif response.status_code in timeout_codes and retry_attempt < max_retries: + console.print(f"[yellow]Batch {batch_num} failed with {response.status_code}, retrying ({retry_attempt + 1}/{max_retries})...") + # Close current files before retry + for f in files.values(): + if hasattr(f, '__len__') and len(f) > 1: + f[1].close() + # Wait a bit before retry + await asyncio.sleep(5) + return await self.upload_single_batch_to_hdb(image_paths, meta, comp_label, batch_num, retry_attempt + 1) + else: + console.print(f"[red]Batch {batch_num} upload failed with status code {response.status_code}") + return None + + except requests.exceptions.Timeout: + if retry_attempt < max_retries: + console.print(f"[yellow]Batch {batch_num} timed out, retrying ({retry_attempt + 1}/{max_retries})...") + await asyncio.sleep(5) + return await self.upload_single_batch_to_hdb(image_paths, meta, comp_label, batch_num, retry_attempt + 1) + else: + console.print(f"[red]Batch {batch_num} failed after {max_retries} timeout retries") + return None + except requests.RequestException as e: + console.print(f"[red]HTTP Request failed for batch {batch_num}: {e}") + return None + finally: + # Close files to prevent resource leaks + for f in files.values(): + if hasattr(f, '__len__') and len(f) > 1: + f[1].close() + + async def replace_comparison_images_in_desc(self, desc, original_comparisons, rehosted_comparisons): + comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc) + + for comp in comparisons: + comp_sources = comp.split(']', 1)[0].replace('[comparison=', '').strip() + comp_sources = re.split(r"\s*,\s*", comp_sources) + sources_label = ' vs '.join(comp_sources) + + if sources_label in rehosted_comparisons: + rehosted_bbcode = rehosted_comparisons[sources_label] + if rehosted_bbcode: + new_comp = f"[comparison={', '.join(comp_sources)}]{rehosted_bbcode}[/comparison]" + desc = desc.replace(comp, new_comp) + console.print(f"[green]Replaced comparison block for: {sources_label}") + + return desc