Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions src/bbcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,77 @@ def convert_code_to_quote(self, desc):
desc = desc.replace('[/code]', '[/quote]')
return desc

def extract_comparison_images(self, desc):
comparison_images = {}
comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc)

for comp in comparisons:
# Extract sources and count them
comp_sources = comp.split(']', 1)[0].replace('[comparison=', '').strip()
comp_sources = re.split(r"\s*,\s*", comp_sources)
num_sources = len(comp_sources)
sources_label = ' vs '.join(comp_sources)
comp_content = comp.split(']', 1)[1].replace('[/comparison]', '')
comp_images = re.findall(r"(https?:\/\/[^\s\[\]]+\.(?:png|jpg|jpeg|gif|webp))", comp_content, flags=re.IGNORECASE)

# Organize images into groups matching the number of sources
image_groups = []
for i in range(0, len(comp_images), num_sources):
group = comp_images[i:i + num_sources]
if len(group) == num_sources:
image_groups.append(group)

if image_groups:
comparison_images[sources_label] = image_groups

return comparison_images

def convert_comparison_to_hide(self, desc):
comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc)
for comp in comparisons:
# Extract sources and count them
comp_sources = comp.split(']', 1)[0].replace('[comparison=', '').strip()
comp_sources = re.split(r"\s*,\s*", comp_sources)
num_sources = len(comp_sources)

comp_content = comp.split(']', 1)[1].replace('[/comparison]', '')

if '[url=' in comp_content and '[img]' in comp_content:
# Content has BBCode tags - extract them directly
bbcode_matches = re.findall(r'\[url=.*?\]\[img\].*?\[/img\]\[/url\]', comp_content)
formatted_images = []
for i in range(0, len(bbcode_matches), num_sources):
group = bbcode_matches[i:i + num_sources]
if len(group) == num_sources:
formatted_images.append(', '.join(group))

final_images = '\n'.join(formatted_images)
else:
# Content has plain URLs
comp_images = re.findall(r"(https?:\/\/[^\s\[\]]+\.(?:png|jpg))", comp_content, flags=re.IGNORECASE)

# Arrange images in groups matching the number of sources
arranged_images = []
for i in range(0, len(comp_images), num_sources):
group = comp_images[i:i + num_sources]
if len(group) == num_sources:
arranged_images.extend(group)

# Format the images as comma-separated groups
formatted_images = []
for i in range(0, len(arranged_images), num_sources):
group = arranged_images[i:i + num_sources]
formatted_images.append(', '.join(group))

final_images = '\n'.join(formatted_images)

# Create the hide tag
sources_label = ' vs '.join(comp_sources)
new_bbcode = f"[hide={sources_label}]{final_images}[/hide]"
desc = desc.replace(comp, new_bbcode)

return desc

def convert_comparison_to_collapse(self, desc, max_width):
comparisons = re.findall(r"\[comparison=[\s\S]*?\[\/comparison\]", desc)
for comp in comparisons:
Expand Down
145 changes: 145 additions & 0 deletions src/trackermeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,151 @@ async def bounded_check(image_dict):
return valid_images


async def download_comparison_images(comparison_images, meta):
if not comparison_images:
return {}

save_directory = f"{meta['base_dir']}/tmp/{meta['uuid']}/comparisons"
os.makedirs(save_directory, exist_ok=True)

timeout = aiohttp.ClientTimeout(total=30, connect=10, sock_connect=10, sock_read=10)
downloaded_comparisons = {}

semaphore = asyncio.Semaphore(2)

async def download_image_with_semaphore(url, filepath, skip_existing=True):
# Check if file already exists and is valid
if skip_existing and os.path.exists(filepath):
try:
if os.path.getsize(filepath) > 1024: # At least 1KB
from PIL import Image
with Image.open(filepath) as img:
img.verify()
print(f"\r{' ' * 80}\rSkipping existing image: {os.path.basename(filepath)}", end="", flush=True)
return filepath
except Exception:
# If file is corrupted, delete it and re-download
print(f"\r{' ' * 80}\rExisting file corrupted, re-downloading: {os.path.basename(filepath)}", end="", flush=True)
try:
os.remove(filepath)
except Exception:
pass

async with semaphore:
try:
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(url) as response:
if response.status == 200:
image_content = await response.read()
with open(filepath, "wb") as f:
f.write(image_content)
print(f"\r{' ' * 80}\rDownloaded comparison image: {os.path.basename(filepath)}", end="", flush=True)
# Add 500ms delay after successful download
await asyncio.sleep(0.5)
return filepath
else:
console.print(f"[red]Failed to download comparison image {url}. Status: {response.status}")
return None
except Exception as e:
console.print(f"[red]Error downloading comparison image {url}: {e}")
return None

failed_downloads = []

for comp_label, image_groups in comparison_images.items():
console.print(f"\n[cyan]Downloading comparison images for: {comp_label}")

safe_label = "".join(c for c in comp_label if c.isalnum() or c in (' ', '-', '_')).rstrip()
safe_label = safe_label.replace(' ', '_')
comp_dir = os.path.join(save_directory, safe_label)
os.makedirs(comp_dir, exist_ok=True)

downloaded_groups = []
download_tasks = []
task_info = [] # Track which task belongs to which group/image

for group_idx, image_group in enumerate(image_groups):
for img_idx, img_url in enumerate(image_group):
img_extension = os.path.splitext(img_url)[1] or '.jpg'
filename = f"group_{group_idx:03d}_img_{img_idx:02d}{img_extension}"
filepath = os.path.join(comp_dir, filename)

task = download_image_with_semaphore(img_url, filepath)
download_tasks.append(task)
task_info.append((group_idx, img_idx, img_url, filepath))

# Execute all download tasks concurrently (but limited by semaphore)
if download_tasks:
results = await asyncio.gather(*download_tasks, return_exceptions=True)
group_results = {}
for i, result in enumerate(results):
group_idx, img_idx, img_url, filepath = task_info[i]

if isinstance(result, Exception):
console.print(f"[red]Download task failed with exception: {result}")
failed_downloads.append((img_url, filepath))
continue

if result:
if group_idx not in group_results:
group_results[group_idx] = {}
group_results[group_idx][img_idx] = result
else:
failed_downloads.append((img_url, filepath))

for group_idx in sorted(group_results.keys()):
downloaded_group = []
for img_idx in sorted(group_results[group_idx].keys()):
downloaded_group.append(group_results[group_idx][img_idx])
if downloaded_group:
downloaded_groups.append(downloaded_group)

if downloaded_groups:
downloaded_comparisons[comp_label] = downloaded_groups

print(f"\r{' ' * 80}\r", end="", flush=True)

# Retry failed downloads once
if failed_downloads:
console.print(f"[yellow]Retrying {len(failed_downloads)} failed downloads...")
retry_tasks = []
retry_info = []

for img_url, filepath in failed_downloads:
task = download_image_with_semaphore(img_url, filepath, skip_existing=False)
retry_tasks.append(task)
retry_info.append((img_url, filepath))

if retry_tasks:
retry_results = await asyncio.gather(*retry_tasks, return_exceptions=True)

successful_retries = 0
for i, result in enumerate(retry_results):
img_url, filepath = retry_info[i]

if not isinstance(result, Exception) and result:
successful_retries += 1
print(f"\r{' ' * 80}\rRetry successful: {os.path.basename(filepath)}", end="", flush=True)

for comp_label, groups in downloaded_comparisons.items():
comp_dir = os.path.dirname(filepath)
if comp_label.replace(' ', '_').replace(' vs ', '_vs_') in comp_dir:
if not groups:
groups.append([result])
else:
groups[-1].append(result)
break
else:
console.print(f"[red]Retry failed for: {os.path.basename(filepath)}")

if successful_retries > 0:
print(f"\r{' ' * 80}\rSuccessfully retried {successful_retries} out of {len(failed_downloads)} failed downloads")

print("")

return downloaded_comparisons


async def check_image_link(url, timeout=None):
# Handle when pixhost url points to web_url and convert to raw_url
if url.startswith("https://pixhost.to/show/"):
Expand Down
Loading