Audionut · Audionut · Sep 14, 2025 · Sep 15, 2025
diff --git a/src/bbcode.py b/src/bbcode.py
@@ -557,6 +557,77 @@ def convert_code_to_quote(self, desc):
         desc = desc.replace('[/code]', '[/quote]')
         return desc
 
+    def extract_comparison_images(self, desc):
+        comparison_images = {}
+        comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc)
+
+        for comp in comparisons:
+            # Extract sources and count them
+            comp_sources = comp.split(']', 1)[0].replace('[comparison=', '').strip()
+            comp_sources = re.split(r"\s*,\s*", comp_sources)
+            num_sources = len(comp_sources)
+            sources_label = ' vs '.join(comp_sources)
+            comp_content = comp.split(']', 1)[1].replace('[/comparison]', '')
+            comp_images = re.findall(r"(https?:\/\/[^\s\[\]]+\.(?:png|jpg|jpeg|gif|webp))", comp_content, flags=re.IGNORECASE)
+
+            # Organize images into groups matching the number of sources
+            image_groups = []
+            for i in range(0, len(comp_images), num_sources):
+                group = comp_images[i:i + num_sources]
+                if len(group) == num_sources:
+                    image_groups.append(group)
+
+            if image_groups:
+                comparison_images[sources_label] = image_groups
+
+        return comparison_images
+
+    def convert_comparison_to_hide(self, desc):
+        comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc)
+        for comp in comparisons:
+            # Extract sources and count them
+            comp_sources = comp.split(']', 1)[0].replace('[comparison=', '').strip()
+            comp_sources = re.split(r"\s*,\s*", comp_sources)
+            num_sources = len(comp_sources)
+
+            comp_content = comp.split(']', 1)[1].replace('[/comparison]', '')
+
+            if '[url=' in comp_content and '[img]' in comp_content:
+                # Content has BBCode tags - extract them directly
+                bbcode_matches = re.findall(r'\[url=.*?\]\[img\].*?\[/img\]\[/url\]', comp_content)
+                formatted_images = []
+                for i in range(0, len(bbcode_matches), num_sources):
+                    group = bbcode_matches[i:i + num_sources]
+                    if len(group) == num_sources:
+                        formatted_images.append(', '.join(group))
+
+                final_images = '\n'.join(formatted_images)
+            else:
+                # Content has plain URLs
+                comp_images = re.findall(r"(https?:\/\/[^\s\[\]]+\.(?:png|jpg))", comp_content, flags=re.IGNORECASE)
+
+                # Arrange images in groups matching the number of sources
+                arranged_images = []
+                for i in range(0, len(comp_images), num_sources):
+                    group = comp_images[i:i + num_sources]
+                    if len(group) == num_sources:
+                        arranged_images.extend(group)
+
+                # Format the images as comma-separated groups
+                formatted_images = []
+                for i in range(0, len(arranged_images), num_sources):
+                    group = arranged_images[i:i + num_sources]
+                    formatted_images.append(', '.join(group))
+
+                final_images = '\n'.join(formatted_images)
+
+            # Create the hide tag
+            sources_label = ' vs '.join(comp_sources)
+            new_bbcode = f"[hide={sources_label}]{final_images}[/hide]"
+            desc = desc.replace(comp, new_bbcode)
+
+        return desc
+
     def convert_comparison_to_collapse(self, desc, max_width):
         comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc)
         for comp in comparisons:

diff --git a/src/trackermeta.py b/src/trackermeta.py
@@ -172,6 +172,151 @@ async def bounded_check(image_dict):
     return valid_images
 
 
+async def download_comparison_images(comparison_images, meta):
+    if not comparison_images:
+        return {}
+
+    save_directory = f"{meta['base_dir']}/tmp/{meta['uuid']}/comparisons"
+    os.makedirs(save_directory, exist_ok=True)
+
+    timeout = aiohttp.ClientTimeout(total=30, connect=10, sock_connect=10, sock_read=10)
+    downloaded_comparisons = {}
+
+    semaphore = asyncio.Semaphore(2)
+
+    async def download_image_with_semaphore(url, filepath, skip_existing=True):
+        # Check if file already exists and is valid
+        if skip_existing and os.path.exists(filepath):
+            try:
+                if os.path.getsize(filepath) > 1024:  # At least 1KB
+                    from PIL import Image
+                    with Image.open(filepath) as img:
+                        img.verify()
+                    print(f"\r{' ' * 80}\rSkipping existing image: {os.path.basename(filepath)}", end="", flush=True)
+                    return filepath
+            except Exception:
+                # If file is corrupted, delete it and re-download
+                print(f"\r{' ' * 80}\rExisting file corrupted, re-downloading: {os.path.basename(filepath)}", end="", flush=True)
+                try:
+                    os.remove(filepath)
+                except Exception:
+                    pass
+
+        async with semaphore:
+            try:
+                async with aiohttp.ClientSession(timeout=timeout) as session:
+                    async with session.get(url) as response:
+                        if response.status == 200:
+                            image_content = await response.read()
+                            with open(filepath, "wb") as f:
+                                f.write(image_content)
+                            print(f"\r{' ' * 80}\rDownloaded comparison image: {os.path.basename(filepath)}", end="", flush=True)
+                            # Add 500ms delay after successful download
+                            await asyncio.sleep(0.5)
+                            return filepath
+                        else:
+                            console.print(f"[red]Failed to download comparison image {url}. Status: {response.status}")
+                            return None
+            except Exception as e:
+                console.print(f"[red]Error downloading comparison image {url}: {e}")
+                return None
+
+    failed_downloads = []
+
+    for comp_label, image_groups in comparison_images.items():
+        console.print(f"\n[cyan]Downloading comparison images for: {comp_label}")
+
+        safe_label = "".join(c for c in comp_label if c.isalnum() or c in (' ', '-', '_')).rstrip()
+        safe_label = safe_label.replace(' ', '_')
+        comp_dir = os.path.join(save_directory, safe_label)
+        os.makedirs(comp_dir, exist_ok=True)
+
+        downloaded_groups = []
+        download_tasks = []
+        task_info = []  # Track which task belongs to which group/image
+
+        for group_idx, image_group in enumerate(image_groups):
+            for img_idx, img_url in enumerate(image_group):
+                img_extension = os.path.splitext(img_url)[1] or '.jpg'
+                filename = f"group_{group_idx:03d}_img_{img_idx:02d}{img_extension}"
+                filepath = os.path.join(comp_dir, filename)
+
+                task = download_image_with_semaphore(img_url, filepath)
+                download_tasks.append(task)
+                task_info.append((group_idx, img_idx, img_url, filepath))
+
+        # Execute all download tasks concurrently (but limited by semaphore)
+        if download_tasks:
+            results = await asyncio.gather(*download_tasks, return_exceptions=True)
+            group_results = {}
+            for i, result in enumerate(results):
+                group_idx, img_idx, img_url, filepath = task_info[i]
+
+                if isinstance(result, Exception):
+                    console.print(f"[red]Download task failed with exception: {result}")
+                    failed_downloads.append((img_url, filepath))
+                    continue
+
+                if result:
+                    if group_idx not in group_results:
+                        group_results[group_idx] = {}
+                    group_results[group_idx][img_idx] = result
+                else:
+                    failed_downloads.append((img_url, filepath))
+
+            for group_idx in sorted(group_results.keys()):
+                downloaded_group = []
+                for img_idx in sorted(group_results[group_idx].keys()):
+                    downloaded_group.append(group_results[group_idx][img_idx])
+                if downloaded_group:
+                    downloaded_groups.append(downloaded_group)
+
+        if downloaded_groups:
+            downloaded_comparisons[comp_label] = downloaded_groups
+
+    print(f"\r{' ' * 80}\r", end="", flush=True)
+
+    # Retry failed downloads once
+    if failed_downloads:
+        console.print(f"[yellow]Retrying {len(failed_downloads)} failed downloads...")
+        retry_tasks = []
+        retry_info = []
+
+        for img_url, filepath in failed_downloads:
+            task = download_image_with_semaphore(img_url, filepath, skip_existing=False)
+            retry_tasks.append(task)
+            retry_info.append((img_url, filepath))
+
+        if retry_tasks:
+            retry_results = await asyncio.gather(*retry_tasks, return_exceptions=True)
+
+            successful_retries = 0
+            for i, result in enumerate(retry_results):
+                img_url, filepath = retry_info[i]
+
+                if not isinstance(result, Exception) and result:
+                    successful_retries += 1
+                    print(f"\r{' ' * 80}\rRetry successful: {os.path.basename(filepath)}", end="", flush=True)
+
+                    for comp_label, groups in downloaded_comparisons.items():
+                        comp_dir = os.path.dirname(filepath)
+                        if comp_label.replace(' ', '_').replace(' vs ', '_vs_') in comp_dir:
+                            if not groups:
+                                groups.append([result])
+                            else:
+                                groups[-1].append(result)
+                            break
+                else:
+                    console.print(f"[red]Retry failed for: {os.path.basename(filepath)}")
+
+            if successful_retries > 0:
+                print(f"\r{' ' * 80}\rSuccessfully retried {successful_retries} out of {len(failed_downloads)} failed downloads")
+
+    print("")
+
+    return downloaded_comparisons
+
+
 async def check_image_link(url, timeout=None):
     # Handle when pixhost url points to web_url and convert to raw_url
     if url.startswith("https://pixhost.to/show/"):