Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
801a752
feat: add bio-bait spam detection with profile bio scanning
rezhajulio May 1, 2026
b764bed
chore: sanitize spam example references
rezhajulio May 1, 2026
7097a1e
fix: narrow promo hints and word-boundary matching for bio link detec…
rezhajulio May 14, 2026
811c4ce
feat: add bio-bait monitor-only mode with owner alerts and metrics
rezhajulio May 14, 2026
f36a3a0
fix: bio-bait review - trusted bypass, monitor-only alert semantics, …
rezhajulio May 18, 2026
6d67a41
Fix bio-bait routing: remove TEXT|CAPTION filter restriction
rezhajulio May 18, 2026
8e3bb01
fix: bio-bait review - cache eviction, f-string logging, shared white…
rezhajulio May 18, 2026
b2fa84b
chore: ignore local worktrees directory
rezhajulio May 22, 2026
3202610
feat(config): add strict plugin toggle validation
rezhajulio May 22, 2026
125f731
fix(config): apply plugins_default in single-group fallback
rezhajulio May 22, 2026
70dffb1
feat(plugins): add plugin contracts and toggle resolver
rezhajulio May 22, 2026
bcfda23
fix(plugins): align manifest types and export definitions
rezhajulio May 22, 2026
c65f563
feat(plugins): add built-in wrappers with fixed registration order
rezhajulio May 22, 2026
eece630
feat(main): register handlers and jobs via plugin manager
rezhajulio May 22, 2026
8d1ce65
feat(plugins): enforce per-group plugin enable map at runtime
rezhajulio May 22, 2026
90a8c2d
fix(plugins): apply runtime gating to group-scoped handlers
rezhajulio May 22, 2026
075a673
docs(config): add plugin toggle examples for env and groups
rezhajulio May 22, 2026
ec2f004
refactor(plugins): unify plugin name registry to definitions.py
rezhajulio May 22, 2026
d4145d0
fix: resolve DeepSeek V4 Pro review issues for plugin system
rezhajulio May 22, 2026
7177a95
fix: preserve admin cache on startup failure via preload_admin_ids
rezhajulio May 31, 2026
1e11192
fix: clone handlers in captcha plugin to prevent mutation
rezhajulio May 31, 2026
54e50f2
docs: add ADMIN_COMMANDS constant to document guard_plugin skip
rezhajulio May 31, 2026
062268c
fix: add negative caching for bio fetch failures (5 min TTL)
rezhajulio May 31, 2026
24f0815
fix: clarify PLUGINS_DEFAULT test expectations for env vs constructor
rezhajulio May 31, 2026
c734fce
chore: remove unused import in test
rezhajulio May 31, 2026
c1e7b3d
chore: add docs/ to gitignore, update project guidelines
rezhajulio May 31, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,29 @@ DUPLICATE_SPAM_MIN_LENGTH=20
# 0.95 catches minor edits, 0.97 only near-exact copies, 0.90 is more aggressive
DUPLICATE_SPAM_SIMILARITY=0.95

# Enable/disable bio bait detection (true/false)
BIO_BAIT_ENABLED=true

# Monitor-only mode for bio bait detection (true/false)
# When true: no delete/restrict/warning-topic notification, only metrics + owner alert
BIO_BAIT_MONITOR_ONLY=false

# Owner/admin chat ID to receive bio bait monitor alerts (optional)
# Example: 57747812
# BIO_BAIT_ALERT_CHAT_ID=57747812

# Path to groups.json for multi-group support (optional)
# If this file exists, per-group settings are loaded from it instead of the
# GROUP_ID/WARNING_TOPIC_ID/etc. fields above. See groups.json.example.
# GROUPS_CONFIG_PATH=groups.json

# Default plugin enable/disable map for all groups (optional, single-group mode)
# JSON object mapping built-in plugin names to booleans.
# Plugins not listed inherit their built-in default (enabled).
# Keys must match known plugin names (e.g. "captcha", "dm", "verify").
# Example: PLUGINS_DEFAULT={"captcha":true,"dm":false}
# PLUGINS_DEFAULT={"captcha":true,"dm":false}

# Logfire Configuration (optional - for production logging)
# Get your token from https://logfire.pydantic.dev
LOGFIRE_TOKEN=your_logfire_token_here
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@ __pycache__/
data/
.vscode
# AGENTS.md
.worktrees/

# Agent/planning docs
docs/
4 changes: 4 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,7 @@ if user.id not in admin_ids:
- Captcha callback data encodes group_id: `captcha_verify_{group_id}_{user_id}` to avoid ambiguity
- Scheduler iterates all groups with per-group exception isolation
- DM handler scans all groups in registry for user membership and unrestriction

## Policy

- Never mention AI usage, code generation tools, or automated assistance in commit messages, PR descriptions, code comments, or documentation
22 changes: 19 additions & 3 deletions groups.json.example
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,15 @@
"duplicate_spam_window_seconds": 120,
"duplicate_spam_threshold": 2,
"duplicate_spam_min_length": 20,
"duplicate_spam_similarity": 0.95
"duplicate_spam_similarity": 0.95,
"bio_bait_enabled": true,
"bio_bait_monitor_only": false,
"bio_bait_alert_chat_id": null,
"plugins": {
"captcha": false,
"dm": true,
"verify": true
}
},
{
"group_id": -1009876543210,
Expand All @@ -33,6 +41,14 @@
"duplicate_spam_window_seconds": 60,
"duplicate_spam_threshold": 2,
"duplicate_spam_min_length": 20,
"duplicate_spam_similarity": 0.90
"duplicate_spam_similarity": 0.90,
"bio_bait_enabled": true,
"bio_bait_monitor_only": false,
"bio_bait_alert_chat_id": null,
"plugins": {
"contact_spam": false,
"duplicate_spam": false,
"profile_monitor": true
}
}
]
]
50 changes: 44 additions & 6 deletions src/bot/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,19 @@
(production, staging) via the BOT_ENV environment variable.
"""

import json
import logging
import os
from datetime import timedelta
from functools import lru_cache
from pathlib import Path

from pydantic import field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict

logger = logging.getLogger(__name__)
from bot.group_config import KNOWN_PLUGINS

logger = logging.getLogger(__name__)

def get_env_file() -> str | None:
"""
Expand All @@ -32,7 +35,7 @@ def get_env_file() -> str | None:
"staging": ".env.staging",
}
env_file = env_files.get(env, ".env")

# Return path only if file exists, otherwise return None
# Pydantic will load from environment variables if no .env file
if Path(env_file).exists():
Expand All @@ -42,7 +45,6 @@ def get_env_file() -> str | None:
logger.debug(f"No .env file found at {env_file}, loading from environment variables")
return None


class Settings(BaseSettings):
"""
Application settings loaded from environment variables.
Expand Down Expand Up @@ -85,19 +87,52 @@ class Settings(BaseSettings):
duplicate_spam_threshold: int = 2
duplicate_spam_min_length: int = 20
duplicate_spam_similarity: float = 0.95
bio_bait_enabled: bool = True
bio_bait_monitor_only: bool = False
bio_bait_alert_chat_id: int | None = None
groups_config_path: str = "groups.json"
logfire_token: str | None = None
logfire_service_name: str = "pythonid-bot"
logfire_environment: str = "production"
logfire_enabled: bool = True
log_level: str = "INFO"
plugins_default: dict[str, bool] = {}

model_config = SettingsConfigDict(
env_file=get_env_file(),
env_file_encoding="utf-8",
extra="ignore",
)

@field_validator("plugins_default", mode="before")
@classmethod
def parse_and_validate_plugins_default(cls, v: object) -> dict[str, bool]:
"""Parse PLUGINS_DEFAULT env var as JSON object and validate keys/values."""
if isinstance(v, dict):
parsed = v
elif isinstance(v, str):
if not v.strip():
return {}
try:
parsed = json.loads(v)
except json.JSONDecodeError:
raise ValueError("PLUGINS_DEFAULT must be a valid JSON string")
if not isinstance(parsed, dict):
raise ValueError("PLUGINS_DEFAULT must be a JSON object")
elif isinstance(v, list):
raise ValueError("PLUGINS_DEFAULT must be a JSON object, got array")
else:
return {}
for key, val in parsed.items():
if key not in KNOWN_PLUGINS:
raise ValueError(f"Unknown plugin key in PLUGINS_DEFAULT: '{key}'")
if not isinstance(val, bool):
raise ValueError(
f"Plugin '{key}' in PLUGINS_DEFAULT must be a boolean, "
f"got {type(val).__name__}"
)
return parsed

def model_post_init(self, __context):
"""Validate and log non-sensitive configuration values after initialization."""
if self.group_id >= 0:
Expand All @@ -115,7 +150,7 @@ def model_post_init(self, __context):
env = os.getenv("BOT_ENV", "production")
if self.logfire_environment == "production" and env == "staging":
self.logfire_environment = "staging"

logger.info("Configuration loaded successfully")
logger.debug(f"group_id: {self.group_id}")
logger.debug(f"warning_topic_id: {self.warning_topic_id}")
Expand All @@ -127,9 +162,13 @@ def model_post_init(self, __context):
logger.debug(f"captcha_timeout_seconds: {self.captcha_timeout_seconds}")
logger.debug(f"new_user_probation_hours: {self.new_user_probation_hours}")
logger.debug(f"new_user_violation_threshold: {self.new_user_violation_threshold}")
logger.debug(f"bio_bait_enabled: {self.bio_bait_enabled}")
logger.debug(f"bio_bait_monitor_only: {self.bio_bait_monitor_only}")
logger.debug(f"bio_bait_alert_chat_id: {self.bio_bait_alert_chat_id}")
logger.debug(f"telegram_bot_token: {'***' + self.telegram_bot_token[-4:]}") # Mask sensitive token
logger.debug(f"logfire_enabled: {self.logfire_enabled}")
logger.debug(f"logfire_environment: {self.logfire_environment}")
logger.debug(f"plugins_default: {self.plugins_default}")

@property
def probation_timedelta(self) -> timedelta:
Expand All @@ -143,7 +182,6 @@ def warning_time_threshold_timedelta(self) -> timedelta:
def captcha_timeout_timedelta(self) -> timedelta:
return timedelta(seconds=self.captcha_timeout_seconds)


@lru_cache
def get_settings() -> Settings:
"""
Expand All @@ -155,4 +193,4 @@ def get_settings() -> Settings:
Returns:
Settings: Application configuration instance.
"""
return Settings()
return Settings()
45 changes: 45 additions & 0 deletions src/bot/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,51 @@ def format_hours_display(hours: int) -> str:
"📌 [Peraturan Grup]({rules_link})"
)

# Bio bait spam notification (e.g. "cek bio aku" / "lihat byoh")
BIO_BAIT_SPAM_NOTIFICATION = (
"🚫 *Spam Bio Bait Terdeteksi*\n\n"
"Pesan dari {user_mention} telah dihapus karena berisi ajakan "
"untuk mengecek bio/profil, pola yang umum dipakai untuk spam/promosi/scam.\n\n"
"Pengguna telah dibatasi.\n\n"
"📌 [Peraturan Grup]({rules_link})"
)

BIO_BAIT_SPAM_NOTIFICATION_NO_RESTRICT = (
"🚫 *Spam Bio Bait Terdeteksi*\n\n"
"Pesan dari {user_mention} telah dihapus karena berisi ajakan "
"untuk mengecek bio/profil, pola yang umum dipakai untuk spam/promosi/scam.\n\n"
"📌 [Peraturan Grup]({rules_link})"
)

# Bio profile link spam (user's profile bio contains promo/scam links)
BIO_LINK_SPAM_NOTIFICATION = (
"🚫 *Spam Bio Profil Terdeteksi*\n\n"
"Pesan dari {user_mention} telah dihapus karena akun ini memiliki "
"bio profil dengan tautan/mention Telegram mencurigakan.\n\n"
"Pengguna telah dibatasi.\n\n"
"📌 [Peraturan Grup]({rules_link})"
)

BIO_LINK_SPAM_NOTIFICATION_NO_RESTRICT = (
"🚫 *Spam Bio Profil Terdeteksi*\n\n"
"Pesan dari {user_mention} telah dihapus karena akun ini memiliki "
"bio profil dengan tautan/mention Telegram mencurigakan.\n\n"
"📌 [Peraturan Grup]({rules_link})"
)

# Monitor-only alert for owner/admin chat when bio bait match is detected.
# Sent without parse_mode to preserve raw message/bio content for forensic review.
BIO_BAIT_MONITOR_ALERT = (
"[BIO BAIT MONITOR]\n"
"Reason: {reason}\n"
"Group ID: {group_id}\n"
"User ID: {user_id}\n"
"User: {user_name}\n"
"Username: {username}\n"
"Message:\n{message_text}\n\n"
"Profile Bio:\n{profile_bio}"
)

# Whitelisted URL domains for new user probation
# These domains are allowed even during probation period
# Matches exact domain or subdomains (e.g., "github.com" matches "www.github.com")
Expand Down
26 changes: 25 additions & 1 deletion src/bot/group_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from pydantic import BaseModel, field_validator
from telegram import Update

from bot.plugins.definitions import PLUGIN_NAMES as KNOWN_PLUGINS

logger = logging.getLogger(__name__)


Expand All @@ -41,6 +43,10 @@ class GroupConfig(BaseModel):
duplicate_spam_threshold: int = 2
duplicate_spam_min_length: int = 20
duplicate_spam_similarity: float = 0.95
bio_bait_enabled: bool = True
bio_bait_monitor_only: bool = False
bio_bait_alert_chat_id: int | None = None
plugins: dict[str, bool] | None = None

@field_validator("group_id")
@classmethod
Expand Down Expand Up @@ -77,6 +83,20 @@ def probation_hours_must_be_non_negative(cls, v: int) -> int:
raise ValueError("new_user_probation_hours must be >= 0")
return v

@field_validator("plugins", mode="before")
@classmethod
def validate_plugins(cls, v: object) -> dict[str, bool] | None:
if v is None:
return None
if not isinstance(v, dict):
raise ValueError("plugins must be a dict or None")
for key, val in v.items():
if key not in KNOWN_PLUGINS:
raise ValueError(f"Unknown plugin key: '{key}'")
if not isinstance(val, bool):
raise ValueError(f"Plugin '{key}' value must be a boolean, got {type(val).__name__}")
return v

@property
def probation_timedelta(self) -> timedelta:
return timedelta(hours=self.new_user_probation_hours)
Expand Down Expand Up @@ -193,6 +213,10 @@ def build_group_registry(settings: object) -> GroupRegistry:
duplicate_spam_threshold=settings.duplicate_spam_threshold,
duplicate_spam_min_length=settings.duplicate_spam_min_length,
duplicate_spam_similarity=settings.duplicate_spam_similarity,
bio_bait_enabled=getattr(settings, "bio_bait_enabled", True),
bio_bait_monitor_only=getattr(settings, "bio_bait_monitor_only", False),
bio_bait_alert_chat_id=getattr(settings, "bio_bait_alert_chat_id", None),
plugins=getattr(settings, "plugins_default", None),
)
registry.register(config)

Expand Down Expand Up @@ -259,4 +283,4 @@ def get_group_registry() -> GroupRegistry:
def reset_group_registry() -> None:
"""Reset the group registry singleton (for testing)."""
global _registry
_registry = None
_registry = None
Loading
Loading