From 873ffcd88a7105d784be4e7237ac99870b834db8 Mon Sep 17 00:00:00 2001
From: Audionut <audionut11@gmail.com>
Date: Sun, 14 Sep 2025 22:34:44 +1000
Subject: [PATCH 1/2] HDB - hide copied comparisons

---
 src/bbcode.py       | 35 +++++++++++++++++++++++++++++++++++
 src/trackers/HDB.py |  2 +-
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/src/bbcode.py b/src/bbcode.py
index 042734505..84e5ed578 100644
--- a/src/bbcode.py
+++ b/src/bbcode.py
@@ -557,6 +557,41 @@ def convert_code_to_quote(self, desc):
         desc = desc.replace('[/code]', '[/quote]')
         return desc
 
+    def convert_comparison_to_hide(self, desc):
+        comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc)
+        for comp in comparisons:
+            # Extract sources and count them
+            comp_sources = comp.split(']', 1)[0].replace('[comparison=', '').strip()
+            comp_sources = re.split(r"\s*,\s*", comp_sources)
+            num_sources = len(comp_sources)
+
+            # Extract all image URLs
+            comp_content = comp.split(']', 1)[1].replace('[/comparison]', '')
+            comp_images = re.findall(r"(https?:\/\/[^\s\[\]]+\.(?:png|jpg))", comp_content, flags=re.IGNORECASE)
+
+            # Arrange images in groups matching the number of sources
+            arranged_images = []
+            for i in range(0, len(comp_images), num_sources):
+                group = comp_images[i:i + num_sources]
+                if len(group) == num_sources:
+                    arranged_images.extend(group)
+
+            # Format the images as comma-separated groups
+            formatted_images = []
+            for i in range(0, len(arranged_images), num_sources):
+                group = arranged_images[i:i + num_sources]
+                formatted_images.append(', '.join(group))
+
+            # Join all groups with newlines
+            final_images = '\n'.join(formatted_images)
+
+            # Create the hide tag
+            sources_label = ' vs '.join(comp_sources)
+            new_bbcode = f"[hide={sources_label}]{final_images}[/hide]"
+            desc = desc.replace(comp, new_bbcode)
+
+        return desc
+
     def convert_comparison_to_collapse(self, desc, max_width):
         comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc)
         for comp in comparisons:
diff --git a/src/trackers/HDB.py b/src/trackers/HDB.py
index eea00fd28..5edcf2b98 100644
--- a/src/trackers/HDB.py
+++ b/src/trackers/HDB.py
@@ -538,7 +538,7 @@ async def edit_desc(self, meta):
             desc = desc.replace("[ol]", "").replace("[/ol]", "")
             desc = desc.replace("[*]", "* ")
             desc = bbcode.convert_spoiler_to_hide(desc)
-            desc = bbcode.convert_comparison_to_centered(desc, 1000)
+            desc = bbcode.convert_comparison_to_hide(desc)
             desc = re.sub(r"(\[img=\d+)]", "[img]", desc, flags=re.IGNORECASE)
             desc = re.sub(r"\[/size\]|\[size=\d+\]", "", desc, flags=re.IGNORECASE)
             descfile.write(desc)

From e953c698c092d2f74b5736a0175d359fa39106f8 Mon Sep 17 00:00:00 2001
From: Audionut <audionut11@gmail.com>
Date: Mon, 15 Sep 2025 22:20:13 +1000
Subject: [PATCH 2/2] rehost the images

---
 src/bbcode.py       |  64 ++++++++++++++----
 src/trackermeta.py  | 145 ++++++++++++++++++++++++++++++++++++++++
 src/trackers/HDB.py | 156 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 351 insertions(+), 14 deletions(-)

diff --git a/src/bbcode.py b/src/bbcode.py
index 84e5ed578..41f2c045a 100644
--- a/src/bbcode.py
+++ b/src/bbcode.py
@@ -557,33 +557,69 @@ def convert_code_to_quote(self, desc):
         desc = desc.replace('[/code]', '[/quote]')
         return desc
 
-    def convert_comparison_to_hide(self, desc):
+    def extract_comparison_images(self, desc):
+        comparison_images = {}
         comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc)
+
         for comp in comparisons:
             # Extract sources and count them
             comp_sources = comp.split(']', 1)[0].replace('[comparison=', '').strip()
             comp_sources = re.split(r"\s*,\s*", comp_sources)
             num_sources = len(comp_sources)
-
-            # Extract all image URLs
+            sources_label = ' vs '.join(comp_sources)
             comp_content = comp.split(']', 1)[1].replace('[/comparison]', '')
-            comp_images = re.findall(r"(https?:\/\/[^\s\[\]]+\.(?:png|jpg))", comp_content, flags=re.IGNORECASE)
+            comp_images = re.findall(r"(https?:\/\/[^\s\[\]]+\.(?:png|jpg|jpeg|gif|webp))", comp_content, flags=re.IGNORECASE)
 
-            # Arrange images in groups matching the number of sources
-            arranged_images = []
+            # Organize images into groups matching the number of sources
+            image_groups = []
             for i in range(0, len(comp_images), num_sources):
                 group = comp_images[i:i + num_sources]
                 if len(group) == num_sources:
-                    arranged_images.extend(group)
+                    image_groups.append(group)
 
-            # Format the images as comma-separated groups
-            formatted_images = []
-            for i in range(0, len(arranged_images), num_sources):
-                group = arranged_images[i:i + num_sources]
-                formatted_images.append(', '.join(group))
+            if image_groups:
+                comparison_images[sources_label] = image_groups
+
+        return comparison_images
+
+    def convert_comparison_to_hide(self, desc):
+        comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc)
+        for comp in comparisons:
+            # Extract sources and count them
+            comp_sources = comp.split(']', 1)[0].replace('[comparison=', '').strip()
+            comp_sources = re.split(r"\s*,\s*", comp_sources)
+            num_sources = len(comp_sources)
+
+            comp_content = comp.split(']', 1)[1].replace('[/comparison]', '')
 
-            # Join all groups with newlines
-            final_images = '\n'.join(formatted_images)
+            if '[url=' in comp_content and '[img]' in comp_content:
+                # Content has BBCode tags - extract them directly
+                bbcode_matches = re.findall(r'\[url=.*?\]\[img\].*?\[/img\]\[/url\]', comp_content)
+                formatted_images = []
+                for i in range(0, len(bbcode_matches), num_sources):
+                    group = bbcode_matches[i:i + num_sources]
+                    if len(group) == num_sources:
+                        formatted_images.append(', '.join(group))
+
+                final_images = '\n'.join(formatted_images)
+            else:
+                # Content has plain URLs
+                comp_images = re.findall(r"(https?:\/\/[^\s\[\]]+\.(?:png|jpg))", comp_content, flags=re.IGNORECASE)
+
+                # Arrange images in groups matching the number of sources
+                arranged_images = []
+                for i in range(0, len(comp_images), num_sources):
+                    group = comp_images[i:i + num_sources]
+                    if len(group) == num_sources:
+                        arranged_images.extend(group)
+
+                # Format the images as comma-separated groups
+                formatted_images = []
+                for i in range(0, len(arranged_images), num_sources):
+                    group = arranged_images[i:i + num_sources]
+                    formatted_images.append(', '.join(group))
+
+                final_images = '\n'.join(formatted_images)
 
             # Create the hide tag
             sources_label = ' vs '.join(comp_sources)
diff --git a/src/trackermeta.py b/src/trackermeta.py
index b6f536873..12b7b69ab 100644
--- a/src/trackermeta.py
+++ b/src/trackermeta.py
@@ -172,6 +172,151 @@ async def bounded_check(image_dict):
     return valid_images
 
 
+async def download_comparison_images(comparison_images, meta):
+    if not comparison_images:
+        return {}
+
+    save_directory = f"{meta['base_dir']}/tmp/{meta['uuid']}/comparisons"
+    os.makedirs(save_directory, exist_ok=True)
+
+    timeout = aiohttp.ClientTimeout(total=30, connect=10, sock_connect=10, sock_read=10)
+    downloaded_comparisons = {}
+
+    semaphore = asyncio.Semaphore(2)
+
+    async def download_image_with_semaphore(url, filepath, skip_existing=True):
+        # Check if file already exists and is valid
+        if skip_existing and os.path.exists(filepath):
+            try:
+                if os.path.getsize(filepath) > 1024:  # At least 1KB
+                    from PIL import Image
+                    with Image.open(filepath) as img:
+                        img.verify()
+                    print(f"\r{' ' * 80}\rSkipping existing image: {os.path.basename(filepath)}", end="", flush=True)
+                    return filepath
+            except Exception:
+                # If file is corrupted, delete it and re-download
+                print(f"\r{' ' * 80}\rExisting file corrupted, re-downloading: {os.path.basename(filepath)}", end="", flush=True)
+                try:
+                    os.remove(filepath)
+                except Exception:
+                    pass
+
+        async with semaphore:
+            try:
+                async with aiohttp.ClientSession(timeout=timeout) as session:
+                    async with session.get(url) as response:
+                        if response.status == 200:
+                            image_content = await response.read()
+                            with open(filepath, "wb") as f:
+                                f.write(image_content)
+                            print(f"\r{' ' * 80}\rDownloaded comparison image: {os.path.basename(filepath)}", end="", flush=True)
+                            # Add 500ms delay after successful download
+                            await asyncio.sleep(0.5)
+                            return filepath
+                        else:
+                            console.print(f"[red]Failed to download comparison image {url}. Status: {response.status}")
+                            return None
+            except Exception as e:
+                console.print(f"[red]Error downloading comparison image {url}: {e}")
+                return None
+
+    failed_downloads = []
+
+    for comp_label, image_groups in comparison_images.items():
+        console.print(f"\n[cyan]Downloading comparison images for: {comp_label}")
+
+        safe_label = "".join(c for c in comp_label if c.isalnum() or c in (' ', '-', '_')).rstrip()
+        safe_label = safe_label.replace(' ', '_')
+        comp_dir = os.path.join(save_directory, safe_label)
+        os.makedirs(comp_dir, exist_ok=True)
+
+        downloaded_groups = []
+        download_tasks = []
+        task_info = []  # Track which task belongs to which group/image
+
+        for group_idx, image_group in enumerate(image_groups):
+            for img_idx, img_url in enumerate(image_group):
+                img_extension = os.path.splitext(img_url)[1] or '.jpg'
+                filename = f"group_{group_idx:03d}_img_{img_idx:02d}{img_extension}"
+                filepath = os.path.join(comp_dir, filename)
+
+                task = download_image_with_semaphore(img_url, filepath)
+                download_tasks.append(task)
+                task_info.append((group_idx, img_idx, img_url, filepath))
+
+        # Execute all download tasks concurrently (but limited by semaphore)
+        if download_tasks:
+            results = await asyncio.gather(*download_tasks, return_exceptions=True)
+            group_results = {}
+            for i, result in enumerate(results):
+                group_idx, img_idx, img_url, filepath = task_info[i]
+
+                if isinstance(result, Exception):
+                    console.print(f"[red]Download task failed with exception: {result}")
+                    failed_downloads.append((img_url, filepath))
+                    continue
+
+                if result:
+                    if group_idx not in group_results:
+                        group_results[group_idx] = {}
+                    group_results[group_idx][img_idx] = result
+                else:
+                    failed_downloads.append((img_url, filepath))
+
+            for group_idx in sorted(group_results.keys()):
+                downloaded_group = []
+                for img_idx in sorted(group_results[group_idx].keys()):
+                    downloaded_group.append(group_results[group_idx][img_idx])
+                if downloaded_group:
+                    downloaded_groups.append(downloaded_group)
+
+        if downloaded_groups:
+            downloaded_comparisons[comp_label] = downloaded_groups
+
+    print(f"\r{' ' * 80}\r", end="", flush=True)
+
+    # Retry failed downloads once
+    if failed_downloads:
+        console.print(f"[yellow]Retrying {len(failed_downloads)} failed downloads...")
+        retry_tasks = []
+        retry_info = []
+
+        for img_url, filepath in failed_downloads:
+            task = download_image_with_semaphore(img_url, filepath, skip_existing=False)
+            retry_tasks.append(task)
+            retry_info.append((img_url, filepath))
+
+        if retry_tasks:
+            retry_results = await asyncio.gather(*retry_tasks, return_exceptions=True)
+
+            successful_retries = 0
+            for i, result in enumerate(retry_results):
+                img_url, filepath = retry_info[i]
+
+                if not isinstance(result, Exception) and result:
+                    successful_retries += 1
+                    print(f"\r{' ' * 80}\rRetry successful: {os.path.basename(filepath)}", end="", flush=True)
+
+                    for comp_label, groups in downloaded_comparisons.items():
+                        comp_dir = os.path.dirname(filepath)
+                        if comp_label.replace(' ', '_').replace(' vs ', '_vs_') in comp_dir:
+                            if not groups:
+                                groups.append([result])
+                            else:
+                                groups[-1].append(result)
+                            break
+                else:
+                    console.print(f"[red]Retry failed for: {os.path.basename(filepath)}")
+
+            if successful_retries > 0:
+                print(f"\r{' ' * 80}\rSuccessfully retried {successful_retries} out of {len(failed_downloads)} failed downloads")
+
+    print("")
+
+    return downloaded_comparisons
+
+
 async def check_image_link(url, timeout=None):
     # Handle when pixhost url points to web_url and convert to raw_url
     if url.startswith("https://pixhost.to/show/"):
diff --git a/src/trackers/HDB.py b/src/trackers/HDB.py
index 5edcf2b98..c465ba139 100644
--- a/src/trackers/HDB.py
+++ b/src/trackers/HDB.py
@@ -14,6 +14,7 @@
 from datetime import datetime
 from torf import Torrent
 from src.torrentcreate import CustomTorrent, torf_cb, create_torrent
+from src.trackermeta import download_comparison_images
 
 
 class HDB():
@@ -538,6 +539,17 @@ async def edit_desc(self, meta):
             desc = desc.replace("[ol]", "").replace("[/ol]", "")
             desc = desc.replace("[*]", "* ")
             desc = bbcode.convert_spoiler_to_hide(desc)
+
+            comparison_images = bbcode.extract_comparison_images(desc)
+            if comparison_images:
+                console.print(f"[cyan]Found {len(comparison_images)} comparison sections to rehost")
+
+                downloaded_comparisons = await download_comparison_images(comparison_images, meta)
+                if downloaded_comparisons:
+                    # Rehost the downloaded comparison images
+                    rehosted_comparisons = await self.rehost_comparison_images(downloaded_comparisons, meta)
+                    desc = await self.replace_comparison_images_in_desc(desc, comparison_images, rehosted_comparisons)
+
             desc = bbcode.convert_comparison_to_hide(desc)
             desc = re.sub(r"(\[img=\d+)]", "[img]", desc, flags=re.IGNORECASE)
             desc = re.sub(r"\[/size\]|\[size=\d+\]", "", desc, flags=re.IGNORECASE)
@@ -846,3 +858,147 @@ async def search_filename(self, search_term, search_file_folder, meta):
 
         console.print('[yellow]Could not find a matching release on HDB[/yellow]')
         return hdb_imdb, hdb_tvdb, hdb_name, hdb_torrenthash, hdb_description, hdb_id
+
+    async def rehost_comparison_images(self, downloaded_comparisons, meta):
+        rehosted_comparisons = {}
+
+        for comp_label, image_groups in downloaded_comparisons.items():
+            console.print(f"[green]Rehosting comparison images for: {comp_label}")
+            all_image_paths = []
+            group_structure = []  # Track which images belong to which group
+
+            for group_idx, image_group in enumerate(image_groups):
+                group_start = len(all_image_paths)
+                all_image_paths.extend(image_group)
+                group_structure.append((group_start, len(image_group)))
+
+            if all_image_paths:
+                bbcode_result = await self.upload_comparison_batch_to_hdb(all_image_paths, meta, comp_label)
+                if bbcode_result:
+                    bbcode_matches = re.findall(r'\[url=.*?\]\[img\].*?\[/img\]\[/url\]', bbcode_result)
+                    if bbcode_matches:
+                        num_sources = len(image_groups[0]) if image_groups else 4
+                        formatted_bbcode = ""
+                        for i in range(0, len(bbcode_matches), num_sources):
+                            line = " ".join(bbcode_matches[i:i+num_sources])
+                            if i + num_sources < len(bbcode_matches):
+                                formatted_bbcode += line + "\n"
+                            else:
+                                formatted_bbcode += line
+
+                        rehosted_comparisons[comp_label] = formatted_bbcode
+                        console.print(f"[green]Successfully rehosted {len(all_image_paths)} images for: {comp_label}")
+                    else:
+                        console.print(f"[red]No BBCode matches found in upload result for: {comp_label}")
+                else:
+                    console.print(f"[red]Failed to rehost images for: {comp_label}")
+
+        return rehosted_comparisons
+
+    async def upload_comparison_batch_to_hdb(self, image_paths, meta, comp_label):
+        # Split into smaller batches to avoid 413 Payload Too Large error
+        max_batch_size = 10
+        all_bbcode_results = []
+
+        for batch_start in range(0, len(image_paths), max_batch_size):
+            batch_end = min(batch_start + max_batch_size, len(image_paths))
+            batch_paths = image_paths[batch_start:batch_end]
+            batch_num = (batch_start // max_batch_size) + 1
+            total_batches = (len(image_paths) + max_batch_size - 1) // max_batch_size
+
+            if meta.get('debug'):
+                console.print(f"[cyan]Uploading batch {batch_num}/{total_batches} ({len(batch_paths)} images) for: {comp_label}")
+
+            bbcode_result = await self.upload_single_batch_to_hdb(batch_paths, meta, comp_label, batch_num)
+            if bbcode_result:
+                all_bbcode_results.append(bbcode_result)
+            else:
+                console.print(f"[red]Failed to upload batch {batch_num} for: {comp_label}")
+
+        if all_bbcode_results:
+            combined_bbcode = "\n".join(all_bbcode_results)
+            console.print(f"[green]Successfully uploaded all batches for: {comp_label}")
+            return combined_bbcode
+        else:
+            console.print(f"[red]All upload batches failed for: {comp_label}")
+            return None
+
+    async def upload_single_batch_to_hdb(self, image_paths, meta, comp_label, batch_num, retry_attempt=0):
+        max_retries = 2
+        timeout_codes = [504, 524, 408, 502, 503]  # timeout/server error codes
+
+        try:
+            url = "https://img.hdbits.org/upload_api.php"
+            data = {
+                'username': self.username,
+                'passkey': self.passkey,
+                'galleryoption': '1',
+                'galleryname': f"{meta['name']} - {comp_label} - Batch {batch_num}",
+                'thumbsize': 'w100'
+            }
+
+            files = {}
+            for i, image_path in enumerate(image_paths):
+                try:
+                    filename = os.path.basename(image_path)
+                    files[f'images_files[{i}]'] = (filename, open(image_path, 'rb'), 'image/png')
+                except Exception as e:
+                    console.print(f"[red]Failed to open {image_path}: {e}")
+                    continue
+
+            if not files:
+                console.print(f"[red]No files to upload in batch {batch_num}")
+                return None
+
+            response = requests.post(url, data=data, files=files, timeout=120)
+
+            if response.status_code == 200:
+                if meta.get('debug'):
+                    console.print(f"[green]Batch {batch_num} upload successful ({len(files)} images)")
+                return response.text
+            elif response.status_code in timeout_codes and retry_attempt < max_retries:
+                console.print(f"[yellow]Batch {batch_num} failed with {response.status_code}, retrying ({retry_attempt + 1}/{max_retries})...")
+                # Close current files before retry
+                for f in files.values():
+                    if hasattr(f, '__len__') and len(f) > 1:
+                        f[1].close()
+                # Wait a bit before retry
+                await asyncio.sleep(5)
+                return await self.upload_single_batch_to_hdb(image_paths, meta, comp_label, batch_num, retry_attempt + 1)
+            else:
+                console.print(f"[red]Batch {batch_num} upload failed with status code {response.status_code}")
+                return None
+
+        except requests.exceptions.Timeout:
+            if retry_attempt < max_retries:
+                console.print(f"[yellow]Batch {batch_num} timed out, retrying ({retry_attempt + 1}/{max_retries})...")
+                await asyncio.sleep(5)
+                return await self.upload_single_batch_to_hdb(image_paths, meta, comp_label, batch_num, retry_attempt + 1)
+            else:
+                console.print(f"[red]Batch {batch_num} failed after {max_retries} timeout retries")
+                return None
+        except requests.RequestException as e:
+            console.print(f"[red]HTTP Request failed for batch {batch_num}: {e}")
+            return None
+        finally:
+            # Close files to prevent resource leaks
+            for f in files.values():
+                if hasattr(f, '__len__') and len(f) > 1:
+                    f[1].close()
+
+    async def replace_comparison_images_in_desc(self, desc, original_comparisons, rehosted_comparisons):
+        comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc)
+
+        for comp in comparisons:
+            comp_sources = comp.split(']', 1)[0].replace('[comparison=', '').strip()
+            comp_sources = re.split(r"\s*,\s*", comp_sources)
+            sources_label = ' vs '.join(comp_sources)
+
+            if sources_label in rehosted_comparisons:
+                rehosted_bbcode = rehosted_comparisons[sources_label]
+                if rehosted_bbcode:
+                    new_comp = f"[comparison={', '.join(comp_sources)}]{rehosted_bbcode}[/comparison]"
+                    desc = desc.replace(comp, new_comp)
+                    console.print(f"[green]Replaced comparison block for: {sources_label}")
+
+        return desc