Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.MD
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0).

## [2.6.38] - 2026-04-06

### Fixed

- **Fix newly discovered files marked completed without being scanned**: Parallel chunk scanning (v2.6.32+) passed the parent `PixelProbe` instance to all chunk worker threads. Each `PixelProbe` uses a `StaticPool` with a single DB connection -- when 3+ chunk threads shared it concurrently, `_save_to_cache()` writes were silently lost due to transaction interference. Files ended up with `scan_status='completed'` but `scan_date=NULL` and `scan_tool=NULL`. Fixed by creating a per-thread `PixelProbe` instance in each chunk worker, giving each its own isolated DB connection. The v2.6.36 raw SQL `UPDATE SET scan_status = 'completed'` fix masked the issue by marking unscanned files as done.

---

## [2.6.37] - 2026-04-06

### Fixed
Expand Down
29 changes: 27 additions & 2 deletions pixelprobe/services/scan_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -1457,14 +1457,21 @@ def _parallel_scan_chunks(self, checker: PixelProbe, chunks: List[ScanChunk],
files_scanned_lock = threading.Lock()
discovery_lock = threading.Lock()
failed_chunks = [] # Track chunks that fail during parallel processing for retry

# Create progress tracker for scan
progress_tracker = ProgressTracker('scan')

# Capture Flask app for worker threads
from flask import current_app
app = current_app._get_current_object()

# Thread-local PixelProbe instances. The parent checker uses StaticPool
# (single DB connection) which causes data races when multiple chunk
# threads share it. Each thread gets its own instance via threading.local().
chunk_thread_local = threading.local()
thread_checkers = [] # Track all instances for connection cleanup
thread_checkers_lock = threading.Lock()

def scan_chunk(chunk_db_id, chunk_id_str):
"""Process a single chunk in a worker thread.

Expand All @@ -1490,9 +1497,19 @@ def scan_chunk(chunk_db_id, chunk_id_str):
pass
return chunk_id_str, 0

if not hasattr(chunk_thread_local, 'checker'):
chunk_thread_local.checker = PixelProbe(
database_path=self.database_uri,
excluded_paths=checker.excluded_paths,
excluded_extensions=checker.excluded_extensions,
excluded_patterns=checker.excluded_patterns
)
with thread_checkers_lock:
thread_checkers.append(chunk_thread_local.checker)

chunk_file_workers = 1 if chunk_workers > 1 else num_workers
try:
self._scan_chunk_files(thread_chunk, checker, force_rescan, 0, 0,
self._scan_chunk_files(thread_chunk, chunk_thread_local.checker, force_rescan, 0, 0,
thread_scan_state, num_workers=chunk_file_workers,
use_atomic_increment=True)
except Exception as e:
Expand Down Expand Up @@ -1639,6 +1656,14 @@ def scan_chunk(chunk_db_id, chunk_id_str):
except Exception as e:
logger.error(f"Chunk {failed_cid} failed on retry: {e}")

# Dispose all thread-local PixelProbe DB engines to release connections
for tc in thread_checkers:
try:
if tc._db_engine:
tc._db_engine.dispose()
except Exception:
pass

# Complete scan
if self.scan_cancelled:
self._handle_scan_cancellation(scan_state)
Expand Down
2 changes: 1 addition & 1 deletion pixelprobe/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# Default version - this is the single source of truth


_DEFAULT_VERSION = '2.6.37'
_DEFAULT_VERSION = '2.6.38'


# Allow override via environment variable for CI/CD, but default to the hardcoded version
Expand Down
Loading