diff --git a/.gitignore b/.gitignore index 4dd14fa9c..1be3ffae2 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,9 @@ src/tests/unit/data/sync.json src/tests/integration/cache/ + +src/src/AI/webserver/router.local.php + +src/src/AI/webserver/router.worker.local.php + +src/src/AI/webserver/router.listener.local.php diff --git a/Makefile b/Makefile index 162115d64..be4e2241c 100644 --- a/Makefile +++ b/Makefile @@ -50,7 +50,7 @@ define execPhpAlpine --workdir "$(2:=/)" \ --add-host=host.docker.internal:host-gateway \ qit-cli-tests:$(PHP_VERSION) \ - bash -c "php $(XDEBUG_FLAGS) $(1)" + bash -c "php -d memory_limit=2G $(XDEBUG_FLAGS) $(1)" endef watch: diff --git a/src/.phan/config.php b/src/.phan/config.php index 2a41c1ca0..4ed027cee 100644 --- a/src/.phan/config.php +++ b/src/.phan/config.php @@ -290,6 +290,8 @@ // should be added to the `directory_list` as well as // to `exclude_analysis_directory_list`. 'exclude_analysis_directory_list' => [ + 'src/AI', + 'src/Commands/AI/NodeStartCommand.php', 'vendor/', ], diff --git a/src/.phpcs.xml.dist b/src/.phpcs.xml.dist index 837069175..00dc2cf19 100644 --- a/src/.phpcs.xml.dist +++ b/src/.phpcs.xml.dist @@ -6,6 +6,9 @@ + + dev/* + src/src/AI/* @@ -59,6 +62,7 @@ + @@ -71,6 +75,10 @@ + + + + diff --git a/src/composer.json b/src/composer.json index 68dbfdd5a..2608caaa8 100644 --- a/src/composer.json +++ b/src/composer.json @@ -32,7 +32,8 @@ "composer/ca-bundle": "^1.4", "symfony/serializer": "^5", "symfony/yaml": "^5", - "vlucas/phpdotenv": "^5" + "vlucas/phpdotenv": "^5", + "psr/container": "^1.1" }, "require-dev": { "phpunit/phpunit": "^8", diff --git a/src/composer.lock b/src/composer.lock index 07186f38d..3c53cb7f4 100644 --- a/src/composer.lock +++ b/src/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "d61a22322201d8ecca93cd8abc525604", + "content-hash": "bd77c28684a1a0c9a9e2ff853b7d023f", "packages": [ { "name": "composer/ca-bundle", diff --git a/src/phpstan.neon b/src/phpstan.neon index db46728cd..3ff3ff36d 100644 --- a/src/phpstan.neon +++ b/src/phpstan.neon @@ -5,6 +5,13 @@ parameters: - /app/src/qit-cli.php scanDirectories: - /app/src/vendor + - /app/src/src/AI/dev + excludePaths: + analyse: + - /app/src/src/Commands/AI + - /app/src/src/AI + analyseAndScan: + - /app/src/*/dev/* tmpDir: /app/.cache parallel: jobSize: 10 diff --git a/src/src/AI/WebServer.php b/src/src/AI/WebServer.php new file mode 100644 index 000000000..ac93b5bf5 --- /dev/null +++ b/src/src/AI/WebServer.php @@ -0,0 +1,426 @@ + */ + private array $provider_config = []; + /** @var array */ + private array $runtime_config = []; + private string $router_template; + private bool $bind_localhost_only = false; + private ?string $custom_log_file = null; + /** @var array */ + private array $environment_variables = []; + + public function __construct( bool $use_local_mode = true ) { + $this->use_local_mode = $use_local_mode; + } + + public static function is_ai_enabled(): bool { + return false; + } + + /** + * Set the logger instance. + * + * @param \QIT_CLI\Logging\Logger $logger The logger instance. + */ + public function set_logger( \QIT_CLI\Logging\Logger $logger ): void { + $this->logger = $logger; + $this->custom_log_file = $logger->get_log_file(); + } + + /** + * Set a node token to use instead of generating a random one. + * + * @param string $t The token to use. + */ + public function set_node_token( string $t ): void { + $this->node_token = $t; + } + + /** + * @param array $config + */ + public function set_provider_config( string $provider, array $config ): void { + $this->provider = $provider; + $this->provider_config = $config; + } + + /** + * Set the runtime configuration. + * + * @param array $config The runtime configuration. + */ + public function set_runtime_config( array $config ): void { + $this->runtime_config = $config; + } + + /** + * Set the router template to use. + * + * @param string $basename The basename of the router template file. + */ + public function set_router_template( string $basename ): void { + $this->router_template = $basename; + } + + /** + * Set whether to bind only to localhost. + */ + public function set_bind_localhost_only(): void { + $this->bind_localhost_only = true; + } + + /** + * Set an environment variable for the web server process. + * + * @param string $name The name of the environment variable. + * @param string $value The value of the environment variable. + */ + public function set_environment_variable( string $name, string $value ): void { + $this->environment_variables[ $name ] = $value; + } + + public function start(): string { + /* ───────────────────── 1. Validate caller contract ─────────────────── */ + $required = [ + 'nodeToken' => $this->node_token, + 'routerTemplate' => $this->router_template, + 'ai_dir' => $this->runtime_config['ai_dir'] ?? null, + 'tmp_base' => $this->runtime_config['tmp_base'] ?? null, + ]; + + $missing = array_keys( + array_filter( $required, static fn( $v ) => $v === null || $v === '' ) + ); + + if ( $missing ) { + throw new \RuntimeException( + 'WebServer mis‑configuration: missing ' . implode( ', ', $missing ) + ); + } + + /* + ───────────────────── 2. Set guaranteed values ────────────────────── + */ + + if ( $this->logger ) { + $this->logger->info( 'Starting webserver', [ + 'mode' => $this->use_local_mode ? 'local' : 'temp', + ] ); + } + + // Find an available port + $this->port = $this->find_available_port(); + if ( $this->logger ) { + $this->logger->debug( 'Found available port', [ 'port' => $this->port ] ); + } + if ( $this->logger ) { + $this->logger->debug( 'Using provided node token', [ + 'token_prefix' => substr( $this->node_token, 0, 8 ) . '...', + ] ); + } + + if ( $this->use_local_mode ) { + // Use the source webserver directory directly + $this->webroot = __DIR__ . '/webserver'; + if ( ! is_dir( $this->webroot ) ) { + $error_msg = 'Webserver source directory not found: ' . $this->webroot; + if ( $this->logger ) { + $this->logger->error( $error_msg ); + } + throw new \RuntimeException( $error_msg ); + } + if ( $this->logger ) { + $this->logger->info( 'Using local webserver directory', [ 'webroot' => $this->webroot ] ); + } + } else { + // Create temp directory and copy files + $this->setup_temp_webroot(); + } + + // No placeholder replacement or temp router file creation needed anymore + // Just use the router template directly + $router_path = $this->webroot . '/' . $this->router_template; + + // Configure open_basedir restrictions for security + $allowed = [ + // treat as *directories* by adding the trailing slash + $this->runtime_config['tmp_base'] . '/', // /tmp/qit-node/ (parent, not child) + $this->runtime_config['ai_dir'] . '/', // AI directory + ]; + + if ( $this->use_local_mode ) { + $allowed[] = rtrim( __DIR__, '/' ) . '/'; // Allow access to the project directory + } + + $open_basedir = implode( PATH_SEPARATOR, $allowed ); + + // Determine host binding based on bind_localhost_only flag + $host = $this->bind_localhost_only ? "127.0.0.1:{$this->port}" : "0.0.0.0:{$this->port}"; + + if ( $this->logger ) { + $this->logger->info( 'Starting PHP built-in server', [ + 'host' => $host, + 'webroot' => $this->webroot, + 'router' => $router_path, + 'mode' => $this->use_local_mode ? 'local' : 'temp', + 'open_basedir' => $open_basedir, + 'localhost_only' => $this->bind_localhost_only, + ] ); + } + + $env = [ + // everything the routers must know + 'QIT_NODE_TOKEN' => $this->node_token, + 'QIT_LOG_FILE' => $this->logger->get_log_file(), + 'QIT_NODE_DIR' => $this->runtime_config['tmp_base'], + 'QIT_AI_DIR' => $this->runtime_config['ai_dir'], + 'QIT_PROVIDER' => $this->provider, + 'QIT_PROVIDER_CFG' => json_encode( $this->provider_config ), + ]; + + // Add custom environment variables + if ( ! empty( $this->environment_variables ) ) { + $env = array_merge( $env, $this->environment_variables ); + if ( $this->logger ) { + $this->logger->debug('Added custom environment variables', [ + 'variables' => array_keys( $this->environment_variables ), + ]); + } + } + + $this->process = new Process( + [ + 'php', + '-d', + 'open_basedir=' . $open_basedir, + '-d', + 'variables_order=EGPCS', + '-S', + $host, + '-t', + $this->webroot, + // router file (no placeholders any more) + $this->webroot . '/' . $this->router_template, + ], + null, // cwd + $env + ); + + $this->process->start(); + + // Give it a moment to start + if ( $this->logger ) { + $this->logger->debug( 'Waiting for server to start' ); + } + usleep( 500000 ); // 0.5 seconds + + // Check if it started successfully + if ( ! $this->process->isRunning() ) { + $error_output = $this->process->getErrorOutput(); + $error_msg = 'Failed to start web server: ' . $error_output; + if ( $this->logger ) { + $this->logger->error( $error_msg, [ + 'error_output' => $error_output, + ] ); + } + throw new \RuntimeException( $error_msg ); + } + + $server_url = "http://localhost:{$this->port}"; + if ( $this->logger ) { + $this->logger->info( 'Webserver started successfully', [ 'url' => $server_url ] ); + } + + return $server_url; + } + + /** + * Setup temporary webroot directory (for temp mode) + */ + private function setup_temp_webroot(): void { + // Get base temp directory from runtime config (already validated) + $base = $this->runtime_config['tmp_base']; + if ( empty( $base ) || $base === '/' ) { + $error_msg = 'Invalid temp base directory'; + if ( $this->logger ) { + $this->logger->error( $error_msg, [ 'tmp_base' => $base ] ); + } + throw new \RuntimeException( $error_msg ); + } + + // Create the base directory if it doesn't exist + if ( ! is_dir( $base ) ) { + mkdir( $base, 0700, true ); + if ( $this->logger ) { + $this->logger->debug( 'Created base temp directory', [ 'base' => $base ] ); + } + } + + // Create a unique run directory for this session + $this->webroot = $base . '/run-' . bin2hex( random_bytes( 4 ) ); + if ( $this->logger ) { + $this->logger->debug( 'Creating webroot directory', [ 'webroot' => $this->webroot ] ); + } + + // Ensure we're creating in a safe location + if ( strpos( $this->webroot, $base ) !== 0 ) { + $error_msg = 'Webroot must be in temp base directory'; + if ( $this->logger ) { + $this->logger->error( $error_msg, [ + 'webroot' => $this->webroot, + 'base' => $base, + ] ); + } + throw new \RuntimeException( $error_msg ); + } + + mkdir( $this->webroot, 0700, true ); + if ( $this->logger ) { + $this->logger->debug( 'Created webroot directory' ); + } + + // Create extracted-zips directory + mkdir( $this->webroot . '/extracted-zips', 0700, true ); + if ( $this->logger ) { + $this->logger->debug( 'Created extracted-zips directory' ); + } + + // Copy webserver files from source to temp directory + if ( $this->logger ) { + $this->logger->debug( 'Copying webserver files to temp directory' ); + } + $this->copy_webserver_files(); + + // No need to replace placeholders anymore, as we're using environment variables + + if ( $this->logger ) { + $this->logger->debug( 'Webserver files prepared' ); + } + } + + /** + * Copy webserver files from source directory to temp directory + */ + private function copy_webserver_files(): void { + // Get the source webserver directory + $source_dir = __DIR__ . '/webserver'; + + if ( ! is_dir( $source_dir ) ) { + $error_msg = 'Webserver source directory not found: ' . $source_dir; + if ( $this->logger ) { + $this->logger->error( $error_msg ); + } + throw new \RuntimeException( $error_msg ); + } + + // Copy all files recursively + $this->recursive_copy( $source_dir, $this->webroot ); + + if ( $this->logger ) { + $this->logger->debug( 'Webserver files copied successfully', [ + 'source' => $source_dir, + 'destination' => $this->webroot, + ] ); + } + } + + /** + * Recursively copy directory contents + */ + private function recursive_copy( string $source, string $dest ): void { + // Create destination directory if it doesn't exist + if ( ! is_dir( $dest ) ) { + mkdir( $dest, 0777, true ); + } + + // Get all files and directories + $iterator = new \RecursiveIteratorIterator( + new \RecursiveDirectoryIterator( $source, \RecursiveDirectoryIterator::SKIP_DOTS ), + \RecursiveIteratorIterator::SELF_FIRST + ); + + foreach ( $iterator as $item ) { + $target = $dest . '/' . $iterator->getSubPathName(); + + if ( $item->isDir() ) { + if ( ! is_dir( $target ) ) { + mkdir( $target, 0777, true ); + } + } else { + copy( $item->getPathname(), $target ); + // Make PHP files executable + if ( pathinfo( $target, PATHINFO_EXTENSION ) === 'php' ) { + chmod( $target, 0755 ); + } + } + } + } + + /** + * Placeholder replacement and temp router file creation methods removed + * as they are no longer needed with environment variables + */ + public function get_node_token(): string { + return $this->node_token; + } + + + private function find_available_port(): int { + // Let PHP find an available port by binding to port 0 + $temp_server = stream_socket_server( 'tcp://127.0.0.1:0', $errno, $errstr ); + if ( ! $temp_server ) { + throw new \RuntimeException( "Failed to find available port: $errstr" ); + } + + $name = stream_socket_get_name( $temp_server, false ); + fclose( $temp_server ); + + $parts = explode( ':', $name ); + + return (int) $parts[1]; + } + + public function stop(): void { + if ( $this->logger ) { + $this->logger->info( 'Stopping webserver', [ + 'mode' => $this->use_local_mode ? 'local' : 'temp', + ] ); + } + + if ( $this->process && $this->process->isRunning() ) { + if ( $this->logger ) { + $this->logger->debug( 'Terminating webserver process' ); + } + $this->process->stop(); + } elseif ( $this->logger ) { + $this->logger->debug( 'No running process to stop' ); + } + + // No need to clean up temporary router files anymore, as we're using environment variables + if ( $this->use_local_mode ) { + if ( $this->logger ) { + $this->logger->info( 'Webserver stopped (local mode)' ); + } + + return; + } + + if ( $this->logger ) { + $this->logger->info( 'Skipping explicit tmp cleanup; relying on OS tmp purge' ); + } + } +} diff --git a/src/src/AI/webserver/Benchmark.php b/src/src/AI/webserver/Benchmark.php new file mode 100644 index 000000000..0c7201f14 --- /dev/null +++ b/src/src/AI/webserver/Benchmark.php @@ -0,0 +1,184 @@ +}> */ + private static array $performance_markers = []; + private static ?self $instance = null; + + /** + * Get singleton instance + */ + public static function get_instance(): self { + if ( self::$instance === null ) { + self::$instance = new self(); + } + return self::$instance; + } + + /** + * Initialize benchmark tracking (call at request start) + */ + public static function init(): void { + self::$request_start_time = microtime( true ); + self::$performance_markers = []; + } + + /** + * Mark a performance checkpoint + * + * @param string $name Marker name. + * @param array $data Optional data to associate with marker. + */ + public static function mark( string $name, array $data = [] ): void { + self::$performance_markers[] = [ + 'name' => $name, + 'time' => microtime( true ), + 'data' => $data, + ]; + } + + /** + * Get performance statistics + * + * @return array Performance data + */ + public static function get_stats(): array { + $end_time = microtime( true ); + $total_time = self::$request_start_time ? ( $end_time - self::$request_start_time ) * 1000 : null; + + $stats = [ + 'total_duration_ms' => $total_time ? round( $total_time, 2 ) : null, + 'timestamp' => time(), + 'memory_peak_mb' => round( memory_get_peak_usage( true ) / 1048576, 2 ), + ]; + + // Add markers if any + if ( ! empty( self::$performance_markers ) ) { + $markers = []; + $last_time = self::$request_start_time; + + foreach ( self::$performance_markers as $marker ) { + $duration = ( $marker['time'] - $last_time ) * 1000; + $markers[ $marker['name'] ] = [ + 'duration_ms' => round( $duration, 2 ), + 'cumulative_ms' => round( ( $marker['time'] - self::$request_start_time ) * 1000, 2 ), + ]; + + if ( ! empty( $marker['data'] ) ) { + $markers[ $marker['name'] ]['data'] = $marker['data']; + } + + $last_time = $marker['time']; + } + + $stats['markers'] = $markers; + } + + return $stats; + } + + /** + * Extract token statistics from provider response + * + * @param array $provider_response Raw provider response (Ollama, OpenAI, etc.). + * @return array Token statistics + */ + public static function extract_provider_stats( array $provider_response ): array { + $stats = []; + + // Ollama-style response format + if ( isset( $provider_response['eval_count'] ) ) { + $stats['tokens_generated'] = $provider_response['eval_count']; + } + + if ( isset( $provider_response['eval_duration'] ) && $provider_response['eval_duration'] > 0 && isset( $provider_response['eval_count'] ) ) { + $eval_seconds = $provider_response['eval_duration'] / 1000000000; + $stats['tokens_per_second'] = round( $provider_response['eval_count'] / $eval_seconds, 2 ); + $stats['generation_duration_ms'] = round( $provider_response['eval_duration'] / 1000000, 2 ); + } + + if ( isset( $provider_response['prompt_eval_count'] ) ) { + $stats['prompt_tokens'] = $provider_response['prompt_eval_count']; + } + + if ( isset( $provider_response['prompt_eval_duration'] ) ) { + $stats['prompt_eval_duration_ms'] = round( $provider_response['prompt_eval_duration'] / 1000000, 2 ); + } + + if ( isset( $provider_response['total_duration'] ) ) { + $stats['total_duration_ms'] = round( $provider_response['total_duration'] / 1000000, 2 ); + } + + // OpenAI-style response format (usage object) + if ( isset( $provider_response['usage'] ) ) { + $usage = $provider_response['usage']; + if ( isset( $usage['prompt_tokens'] ) ) { + $stats['prompt_tokens'] = $usage['prompt_tokens']; + } + if ( isset( $usage['completion_tokens'] ) ) { + $stats['tokens_generated'] = $usage['completion_tokens']; + } + if ( isset( $usage['total_tokens'] ) ) { + $stats['total_tokens'] = $usage['total_tokens']; + } + } + + return $stats; + } + + /** + * Add performance metrics to a response + * + * @param array $response Response array to enhance with metrics. + * @return array Enhanced response + */ + public static function enhance_response( array $response ): array { + if ( ! isset( $response['meta'] ) ) { + $response['meta'] = []; + } + + $response['meta'] = array_merge( + $response['meta'], + self::get_stats() + ); + + return $response; + } + + /** + * Add performance metrics to a response (camelCase alias) + * + * @param array $response Response array to enhance with metrics. + * @return array Enhanced response + */ + public static function enhanceResponse( array $response ): array { + return self::enhance_response( $response ); + } + + /** + * Extract token statistics from provider response (camelCase alias) + * + * @param array $provider_response Raw provider response (Ollama, OpenAI, etc.). + * @return array Token statistics + */ + public static function extractProviderStats( array $provider_response ): array { + return self::extract_provider_stats( $provider_response ); + } + + /** + * Alias for tool_prompt method + * + * @param array $response + * @return array + */ + public static function tool_prompt( array $response ): array { + return self::enhance_response( $response ); + } +} diff --git a/src/src/AI/webserver/Chat/SafeToolsOpenAIChat.php b/src/src/AI/webserver/Chat/SafeToolsOpenAIChat.php new file mode 100644 index 000000000..70071cb14 --- /dev/null +++ b/src/src/AI/webserver/Chat/SafeToolsOpenAIChat.php @@ -0,0 +1,51 @@ + + */ + protected function getToolsToCall( CreateResponse $answer ): array { + $valid = []; + $this->unknown_tools = []; + + foreach ( $answer->choices[0]->message->toolCalls as $tc ) { + $name = $tc->function->name; + $found = false; + + foreach ( $this->tools as $fn ) { + if ( $fn->name === $name ) { + $fi = $fn->cloneWithId( $tc->id ); + // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase + $fi->jsonArgs = $tc->function->arguments; + $valid[] = $fi; + $found = true; + break; + } + } + + if ( ! $found ) { + $this->unknown_tools[] = $name; + } + } + + return $valid; + } + + /** + * @return array + */ + public function getUnknownTools(): array { + return $this->unknown_tools; + } +} diff --git a/src/src/AI/webserver/Endpoints/AbstractEndpoint.php b/src/src/AI/webserver/Endpoints/AbstractEndpoint.php new file mode 100644 index 000000000..d93c5492a --- /dev/null +++ b/src/src/AI/webserver/Endpoints/AbstractEndpoint.php @@ -0,0 +1,112 @@ +chat = \QIT_AI_Webserver\Lib\LLPhantBootstrap::chat(); + } + + /** + * Get the route for this endpoint + * + * @return string The route path (e.g., '/basic-prompt') + */ + abstract public function get_route(): string; + + /** + * Handle the request + * + * @param array $input Request input data. + * @return string JSON response + */ + abstract public function handle( array $input ); + + /** + * Handle errors consistently across all endpoints + * + * @param Exception $e Exception to handle. + * @param array $context Additional context for error reporting. + * @return string JSON error response + */ + protected function handle_error( Exception $e, array $context = [] ): string { + $trace = $e->getTraceAsString(); + + $error_context = array_merge( [ + 'exception' => get_class( $e ), + 'trace' => $trace, + ], $context ); + + $this->log_error( 'Processing error: ' . $e->getMessage(), $error_context ); + + // Report error back to manager + $error_report = [ + 'job_id' => $context['job_id'] ?? null, + 'error_type' => get_class( $e ), + 'error_message' => $e->getMessage(), + 'error_time' => gmdate( 'Y-m-d H:i:s' ), + 'job_type' => $context['job_type'] ?? 'unknown', + ]; + + $this->log_info( 'Storing error for next heartbeat', [ + 'job_id' => $context['job_id'] ?? 'unknown', + 'error_type' => get_class( $e ), + ] ); + + // Store error for next heartbeat + $error_dir = rtrim( sys_get_temp_dir(), '/\\' ) . '/qit-node/errors'; + if ( ! is_dir( $error_dir ) ) { + mkdir( $error_dir, 0700, true ); + } + file_put_contents( + $error_dir . '/qit-node-last-error.json', + json_encode( $error_report ) + ); + + // Use NodeResponse::error for standardized error response + // Get JSON response as string and echo it + return json_encode( NodeResponse::error( $e->getMessage(), 500, $error_report ) ); + } + + /** + * Logging methods - these would use the global logging functions + */ + public function log_info( string $message, array $context = [] ): void { + log_info( $message, $context ); + } + + /** + * @param string $message + * @param array $context + */ + public function log_debug( string $message, array $context = [] ): void { + log_debug( $message, $context ); + } + + /** + * @param string $message + * @param array $context + */ + public function log_error( string $message, array $context = [] ): void { + log_error( $message, $context ); + } + + /** + * @param string $message + * @param array $context + */ + public function log_warning( string $message, array $context = [] ): void { + log_warning( $message, $context ); + } +} diff --git a/src/src/AI/webserver/Endpoints/BasicPromptEndpoint.php b/src/src/AI/webserver/Endpoints/BasicPromptEndpoint.php new file mode 100644 index 000000000..2aa0708f9 --- /dev/null +++ b/src/src/AI/webserver/Endpoints/BasicPromptEndpoint.php @@ -0,0 +1,120 @@ + $input Request input data. + * + * @return string JSON response + */ + public function handle( array $input ): string { + $this->log_info( 'Processing basic AI request' ); + + try { + // Get model and provider info from bootstrapped LLPhant integration + $model = \QIT_AI_Webserver\Lib\LLPhantBootstrap::getModel(); + $provider = \QIT_AI_Webserver\Lib\LLPhantBootstrap::getCurrentProvider(); + + $messages = []; + foreach ( $input['messages'] as $m ) { + $messages[] = \LLPhant\Chat\Message::{$m['role']}( $m['content'] ); + } + + if ( ! empty( $input['response_format'] ) ) { + // Forward the desired response format directly to the AI provider. + $this->chat->setModelOption( 'response_format', $input['response_format'] ); + } + + $this->log_info( 'Starting AI processing', [ + 'job_id' => $input['job_id'] ?? 'unknown', + 'message_count' => count( $messages ), + 'has_schema' => isset( $input['response_format'] ) ? 'yes' : 'no', + 'has_options' => isset( $input['options'] ) ? 'yes' : 'no', + ] ); + + // Make the API call using chat with additional error handling + NodeResponse::mark( 'llm_call' ); + $start = microtime( true ); + + try { + $result = $this->chat->generateChat( $messages ); + } catch ( \TypeError $e ) { + // Check if the model exists. + throw $e; + } + + $elapsed = microtime( true ) - $start; + + $response = [ + 'response' => trim( (string) $result ), + 'duration' => $elapsed, + 'model' => $model, + 'provider' => $provider, + ]; + + // Log performance metrics + $this->log_info( 'AI processing completed successfully', [ + 'job_id' => $input['job_id'] ?? 'unknown', + 'model' => $response['model'], + 'provider' => $response['provider'], + 'duration' => $response['duration'] ?? 0, + 'response_length' => strlen( $response['response'] ), + ] ); + + // Log response structure before formatting + $this->log_info( 'Response structure before formatting', [ + 'job_id' => $input['job_id'] ?? 'unknown', + 'response_type' => gettype( $response['response'] ), + 'response_starts' => substr( $response['response'], 0, 50 ) . '...', + 'has_json_schema' => isset( $input['response_format'] ) && isset( $input['response_format']['type'] ) && $input['response_format']['type'] === 'json_schema', + ] ); + + // Use NodeResponse::prompt for standardized response + // Get JSON response as string and echo it + $formatted_response = json_encode( NodeResponse::prompt( + trim( $response['response'] ), + $response['model'], + $response, // Pass full response for stats + [ 'job_id' => $input['job_id'] ?? null ] + ) ); + + // Log the formatted response structure + $this->log_info( 'Formatted response structure', [ + 'job_id' => $input['job_id'] ?? 'unknown', + 'response_length' => strlen( $formatted_response ), + 'response_starts' => substr( $formatted_response, 0, 50 ) . '...', + ] ); + + return $formatted_response; + + } catch ( Exception $e ) { + + return $this->handle_error( $e, [ + 'job_id' => $input['job_id'] ?? null, + 'model' => $input['model'] ?? 'unknown', + 'job_type' => $input['type'] ?? 'unknown', + ] ); + } + } +} diff --git a/src/src/AI/webserver/Endpoints/FileReadingEndpoint.php b/src/src/AI/webserver/Endpoints/FileReadingEndpoint.php new file mode 100644 index 000000000..b72685eea --- /dev/null +++ b/src/src/AI/webserver/Endpoints/FileReadingEndpoint.php @@ -0,0 +1,139 @@ + $input Request input data. + * + * @return string JSON response + */ + public function handle( array $input ): string { + $this->log_info( 'Starting file reading endpoint', [ + 'input_keys' => array_keys( $input ), + 'has_file' => isset( $input['file'] ), + 'has_extract_path' => isset( $input['extract_path'] ), + ] ); + + // Access parameters directly from input (consistent with Actions) + if ( ! isset( $input['file'] ) || empty( $input['file'] ) ) { + $this->log_error( 'No file provided for reading' ); + http_response_code( 400 ); + return json_encode( NodeResponse::error( 'Missing file parameter' ) ); + } + + $file_path = $input['file']; + + // Use centralized path resolution + try { + $extract_path = ExtractPathResolver::resolve( $input ); + $this->log_info( 'Extract path resolved for file reading', [ 'extract_path' => $extract_path ] ); + } catch ( Exception $e ) { + $this->log_error( 'Path resolution failed for file reading', [ + 'error' => $e->getMessage(), + 'file' => $file_path, + 'diagnostics' => ExtractPathResolver::get_diagnostic_message( $input ), + ] ); + http_response_code( 400 ); + return json_encode( NodeResponse::error( $e->getMessage() ) ); + } + + // SECURITY: Prevent directory traversal attacks + if ( strpos( $file_path, '..' ) !== false ) { + $this->log_error( 'Directory traversal attempt detected in file', [ + 'file' => $file_path, + ] ); + http_response_code( 400 ); + return json_encode( NodeResponse::error( 'Directory traversal sequences (..) are not allowed in file.' ) ); + } + + // SECURITY: Reject any path containing null bytes + if ( strpos( $file_path, "\0" ) !== false ) { + $this->log_error( 'Null byte injection attempt detected in file', [ + 'file' => $file_path, + ] ); + http_response_code( 400 ); + return json_encode( NodeResponse::error( 'Null bytes are not allowed in file.' ) ); + } + + try { + $this->log_info( 'Reading file content', [ + 'file' => $file_path, + 'extract_path' => $extract_path, + ] ); + + // Initialize ToolRegistry with the extract path as work directory + $registry = new ToolRegistry( $extract_path ); + + // Use the read_file tool to read the file content + $result = $registry->execute_tool( 'read_file', [ + 'file' => $file_path, + ] ); + + if ( ! $result['success'] ) { + $this->log_error( 'Failed to read file', [ + 'file' => $file_path, + 'error' => $result['error'], + ] ); + + http_response_code( 404 ); + return json_encode( NodeResponse::error( 'File reading failed: ' . $result['error'] ) ); + } + + // FileReadingEndpoint::handle() – right before NodeResponse::success() + $content = $result['data']['content'] ?? ''; + $lines = $result['data']['total_lines'] ?? 0; + $raw_lines = explode( "\n", $content ); + $numbered = []; + foreach ( $raw_lines as $idx => $l ) { + // human-friendly 1-based index, 6-char wide + $numbered[] = str_pad( $idx + 1, 6, ' ', STR_PAD_LEFT ) . '│ ' . $l; + } + + $this->log_info( 'File read successfully', [ + 'file' => $file_path, + 'content_size' => strlen( $content ), + 'total_lines' => $lines, + ] ); + + // Return clean response + return json_encode( NodeResponse::success( [ + 'file_content' => $content, + 'file_lines' => $lines, + 'file_size' => strlen( $content ), + 'content_with_line_numbers' => implode( "\n", $numbered ), + 'file' => $file_path, + 'extract_path' => $extract_path, + ], 'file_reading' ) ); + + } catch ( Exception $e ) { + $this->log_error( 'File reading failed: ' . $e->getMessage(), [ + 'file' => $file_path, + 'extract_path' => $extract_path, + ] ); + + return json_encode( NodeResponse::error( 'File reading failed', 500, [ 'message' => $e->getMessage() ] ) ); + } + } +} diff --git a/src/src/AI/webserver/Endpoints/VulnerabilityScanEndpoint.php b/src/src/AI/webserver/Endpoints/VulnerabilityScanEndpoint.php new file mode 100644 index 000000000..8715b7575 --- /dev/null +++ b/src/src/AI/webserver/Endpoints/VulnerabilityScanEndpoint.php @@ -0,0 +1,1614 @@ + */ + private array $static_analysis_results = []; + /** @var array */ + private array $selected_candidate = []; + protected ChatInterface $chat; + /** @var array|null */ + private ?array $available_tools = null; + private ?object $context = null; + private string $clipped_source_code = ''; + + /** + * WooCommerce specific tracking + * + * @var array + */ + private array $woo_interfaces = []; + private ?string $marketplace_slug = null; + private ?string $wc_version = null; + + /* ────────────── Dialogue phases ────────────── */ + private const PHASE_GENERATE = 'generate'; + private const PHASE_SELECT = 'select'; + private const PHASE_INVESTIGATE = 'investigate'; + private const PHASE_VERDICT = 'verdict'; + + /* ────────────── Budget constants ────────────── */ + private const INSPECTION_BUDGET = 5; + private const MAX_TURNS = 10; + + /* ────────────── tool taxonomy ────────────── */ + private const RETRIEVAL_TOOLS = [ 'list_facts' ]; + private const EVIDENCE_TOOLS = [ + 'read_file', + 'search_strings', + 'find_hooks', + 'parse_php', + 'tree_directory', + ]; + + private const LINE_FUZZ = 5; + + /** ───────────────────────── 2. Route ─────────────────────────── */ + public function get_route(): string { + return '/vulnerability-scan'; + } + + /** ───────────────────────── 3. Entry ─────────────────────────── */ + public function handle( array $input ) { + // Initialize global debug + $this->global_runtime_debug = "════════════════════════════════════════════════════════════════\n"; + $this->global_runtime_debug .= "WOOCOMMERCE VULNERABILITY SCAN - REQUEST STARTED\n"; + $this->global_runtime_debug .= 'Time: ' . gmdate( 'Y-m-d H:i:s' ) . "\n"; + $this->global_runtime_debug .= "════════════════════════════════════════════════════════════════\n\n"; + + // Log input + $this->global_runtime_debug .= "【INPUT RECEIVED】\n"; + $this->global_runtime_debug .= json_encode( $input, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES ) . "\n"; + $this->global_runtime_debug .= "【/INPUT RECEIVED】\n\n"; + + // Validate input + foreach ( [ 'files', 'plugin_meta', 'response_format' ] as $k ) { + if ( ! isset( $input[ $k ] ) ) { + $this->global_runtime_debug .= "ERROR: Missing required field: {$k}\n"; + + return json_encode( NodeResponse::error( "Missing {$k}", 400 ) ); + } + } + + // Validate single file + if ( count( $input['files'] ) !== 1 ) { + $this->global_runtime_debug .= 'ERROR: Expected exactly 1 file, got ' . count( $input['files'] ) . "\n"; + + return json_encode( NodeResponse::error( 'Exactly one file required, got ' . count( $input['files'] ), 400 ) ); + } + + $model = \QIT_AI_Webserver\Lib\LLPhantBootstrap::getModel(); + $this->global_runtime_debug .= "Model: {$model}\n"; + + $this->root = $input['extract_path']; + $this->sut_dir = rtrim( $input['sut_relative_dir'] ?? '', '/' ); + $deps = $input['deps'] ?? []; + $tool_context = new \QIT_AI_Webserver\ToolContext( $this->root, $this->sut_dir, $deps ); + $this->registry = new ToolRegistry( $tool_context ); + new ToolPathGuard( $this->root, $this->sut_dir ); + + // Extract WooCommerce-specific metadata + $this->marketplace_slug = $input['plugin_meta']['slug'] ?? null; + $this->detectWooCommerceVersion(); + + // Populate availableTools after registry is created + $this->available_tools = []; + foreach ( $this->registry->getTools() as $tool_name => $tool ) { + $func_info = $tool->getFunctionInfo(); + $this->available_tools[] = [ + 'name' => $func_info->name, + 'description' => $func_info->description ?? '', + ]; + } + + // Set context if available in input + if ( isset( $input['context'] ) ) { + $this->context = (object) $input['context']; + } + + $this->static_analysis_results = $this->normalizeStatic( $input['static_analysis_results'] ?? [] ); + + // Cache clipped source code once + $file_path = $input['files'][0]['path']; + $full_file_path = $this->root . '/' . $file_path; + $code_blob = file_get_contents( $full_file_path ); + $this->clipped_source_code = mb_substr( $code_blob, 0, 32_000 ); + + $this->global_runtime_debug .= "Workspace root: {$this->root}\n"; + $this->global_runtime_debug .= 'WooCommerce version: ' . ( $this->wc_version ?? 'unknown' ) . "\n"; + $this->global_runtime_debug .= 'Marketplace slug: ' . ( $this->marketplace_slug ?? 'unknown' ) . "\n"; + $this->global_runtime_debug .= 'Tools registered: ' . count( $this->registry->getTools() ) . "\n"; + $this->global_runtime_debug .= 'Static analysis results: ' . count( $this->static_analysis_results ) . "\n\n"; + + $this->resetDebugFiles(); + + // Execute the four-phase pipeline with telemetry + $phase_telemetry = []; + + $this->global_runtime_debug .= "\n╔════════════════════════════════════════════════════════════════╗\n"; + $this->global_runtime_debug .= "║ WOOCOMMERCE FOUR-PHASE VULNERABILITY PIPELINE ║\n"; + $this->global_runtime_debug .= "╚════════════════════════════════════════════════════════════════╝\n\n"; + + // Phase 1: Generate candidates + $phase_start = microtime( true ); + $candidates = $this->runCandidateGeneration( $input ); + $phase_telemetry['generate'] = [ + 'duration_ms' => round( ( microtime( true ) - $phase_start ) * 1000, 2 ), + 'candidates_found' => count( $candidates ), + ]; + + // Phase 2: Select one candidate + $phase_start = microtime( true ); + $selected = $this->runCandidateSelection( $input, $candidates ); + $this->selected_candidate = $selected; + $phase_telemetry['select'] = [ + 'duration_ms' => round( ( microtime( true ) - $phase_start ) * 1000, 2 ), + 'selected' => $selected, + ]; + + // Phase 3 & 4: Investigation and Verdict + $phase_start = microtime( true ); + $screening = $this->runInvestigation( $input, $selected ); + + // Split investigate and verdict telemetry + $phase_telemetry['investigate'] = [ + 'duration_ms' => round( ( microtime( true ) - $phase_start ) * 1000, 2 ), + 'tool_calls' => count( $screening['tool_calls'] ?? [] ), + ]; + $phase_telemetry['verdict'] = [ + 'iterations' => $screening['iterations'] ?? 0, + 'max_turns_reached' => $screening['max_turns_reached'] ?? false, + ]; + + // Add phase telemetry to result + $screening['phase_telemetry'] = $phase_telemetry; + + // Add WooCommerce-specific telemetry (with deduplication) + $screening['woocommerce_metadata'] = [ + 'marketplace_slug' => $this->marketplace_slug, + 'baseline_wc_version' => $this->wc_version, + 'interfaces_used' => array_values( array_unique( $this->woo_interfaces ) ), // Ensure unique + ]; + + $this->global_runtime_debug .= "\n【FINAL RESULT】\n"; + $this->global_runtime_debug .= json_encode( $screening, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES ) . "\n"; + $this->global_runtime_debug .= "【/FINAL RESULT】\n\n"; + + $this->saveDebugFiles(); + + return json_encode( NodeResponse::toolPrompt( + json_encode( $screening, JSON_UNESCAPED_SLASHES ), + [], + $model, + [ 'iterations' => 0 ] + ) ); + } + + /** + * Detect WooCommerce version from the workspace + * Enhanced with multiple fallback methods + */ + private function detectWooCommerceVersion(): void { + // Try main plugin file first + $wc_plugin_file = $this->root . '/__DEP_[woocommerce]__/woocommerce.php'; + if ( file_exists( $wc_plugin_file ) ) { + $content = file_get_contents( $wc_plugin_file ); + + // Try Version header + if ( preg_match( '/\* Version:\s*([0-9.]+(?:-\w+)?)/', $content, $matches ) ) { + $this->wc_version = 'WooCommerce ' . $matches[1]; + + return; + } + + // Try WC_VERSION constant + if ( preg_match( '/define\s*\(\s*[\'"]WC_VERSION[\'"]\s*,\s*[\'"]([0-9.]+(?:-\w+)?)[\'"]/', $content, $matches ) ) { + $this->wc_version = 'WooCommerce ' . $matches[1]; + + return; + } + } + + // Fallback: try readme.txt + $readme_file = $this->root . '/__DEP_[woocommerce]__/readme.txt'; + if ( file_exists( $readme_file ) ) { + $content = file_get_contents( $readme_file ); + if ( preg_match( '/Stable tag:\s*([0-9.]+)/', $content, $matches ) ) { + $this->wc_version = 'WooCommerce ' . $matches[1]; + + return; + } + } + + // Fallback: check includes/class-woocommerce.php + $wc_class_file = $this->root . '/__DEP_[woocommerce]__/includes/class-woocommerce.php'; + if ( file_exists( $wc_class_file ) ) { + $content = file_get_contents( $wc_class_file ); + if ( preg_match( '/public\s+\$version\s*=\s*[\'"]([0-9.]+(?:-\w+)?)[\'"]/', $content, $matches ) ) { + $this->wc_version = 'WooCommerce ' . $matches[1]; + + return; + } + } + } + + /** + * PHASE 1: Generate candidates - WooCommerce focused + * + * @param array $input + * @return array + */ + private function runCandidateGeneration( array $input ): array { + $this->global_runtime_debug .= "\n┌─── PHASE 1: WOOCOMMERCE CANDIDATE GENERATION ───┐\n"; + + $files = $input['files']; + $plugin_meta = $input['plugin_meta']; + $file_path = $files[0]['path']; + + $static_context = $this->buildStaticAnalysisContext(); + + $prompt = <<marketplace_slug ) ) { + $prompt .= "\nMarketplace slug: {$this->marketplace_slug}"; + } + + $prompt .= <<clipped_source_code} +``` +{$static_context} + +Focus on code that **touches WooCommerce internals**, e.g.: + +• Classes that extend WC_Abstract_*, WC_Payment_Gateway, WC_Shipping_Method, WC_Email +• Functions/hooks registered to 'woocommerce_*' actions or filters +• REST controllers under /wp-json/wc/ or custom endpoints that create/update orders +• AJAX endpoints (wp_ajax, wp_ajax_nopriv) that call WC_Order, WC_Cart, WC_Product APIs +• Direct SQL on WooCommerce tables (wp_posts, wp_postmeta, wp_woocommerce_*) +• Code calculating totals, discounts or handling payment callbacks/webhooks + +Identify EXACTLY 3 suspicious locations with line numbers and rationales. +EOD; + + // Define response format for structured output + $candidates_schema = [ + 'type' => 'json_schema', + 'json_schema' => [ + 'name' => 'vulnerability_candidates', + 'strict' => true, + 'schema' => [ + 'type' => 'object', + 'properties' => [ + 'candidates' => [ + 'type' => 'array', + 'items' => [ + 'type' => 'object', + 'properties' => [ + 'line' => [ + 'type' => 'integer', + 'description' => 'Line number where the suspicious code starts', + ], + 'rationale' => [ + 'type' => 'string', + 'description' => 'Brief explanation of why this location is suspicious', + ], + ], + 'required' => [ 'line', 'rationale' ], + 'additionalProperties' => false, + ], + 'minItems' => 3, + 'maxItems' => 3, + ], + ], + 'required' => [ 'candidates' ], + 'additionalProperties' => false, + ], + ], + ]; + + // Store and restore model options + $previous_tool_choice = 'auto'; + $previous_temperature = 0.35; + $previous_response_format = null; + + try { + // $this->chat->setModelOption( 'tool_choice', 'none' ); + if ( strpos( \QIT_AI_Webserver\Lib\LLPhantBootstrap::getModel(), 'o4-mini' ) === false ) { + $this->chat->setModelOption( 'temperature', 0.2 ); + } else { + $previous_temperature = null; + } + $this->chat->setModelOption( 'response_format', $candidates_schema ); + + $response = $this->chat->generateChat( [ Message::user( $prompt ) ] ); + } finally { + // $this->chat->setModelOption( 'tool_choice', $previous_tool_choice ); + if ( strpos( \QIT_AI_Webserver\Lib\LLPhantBootstrap::getModel(), 'o4-mini' ) === false ) { + $this->chat->setModelOption( 'temperature', $previous_temperature ); + } + if ( $previous_response_format !== null ) { + $this->chat->setModelOption( 'response_format', $previous_response_format ); + } + } + + $this->global_runtime_debug .= "Raw response:\n{$response}\n"; + + // Parse guaranteed valid JSON response + $candidates = []; + $parsed = json_decode( $response, true ); + + if ( $parsed && isset( $parsed['candidates'] ) && is_array( $parsed['candidates'] ) ) { + foreach ( $parsed['candidates'] as $idx => $candidate ) { + $candidates[] = [ + 'file' => $file_path, + 'line' => (int) $candidate['line'], + 'rationale' => $candidate['rationale'], + ]; + } + } + + // This should never happen with structured output, but just in case + if ( empty( $candidates ) ) { + $this->global_runtime_debug .= "ERROR: No candidates returned despite structured output\n"; + $candidates = [ + [ + 'file' => $file_path, + 'line' => 100, + 'rationale' => 'Fallback candidate 1', + ], + [ + 'file' => $file_path, + 'line' => 200, + 'rationale' => 'Fallback candidate 2', + ], + [ + 'file' => $file_path, + 'line' => 300, + 'rationale' => 'Fallback candidate 3', + ], + ]; + } + + $candidates = array_slice( $candidates, 0, 3 ); + + $this->global_runtime_debug .= 'Generated ' . count( $candidates ) . " candidates\n"; + foreach ( $candidates as $idx => $cand ) { + $this->global_runtime_debug .= " [{$idx}] Line {$cand['line']}: {$cand['rationale']}\n"; + } + $this->global_runtime_debug .= "└─── PHASE 1 COMPLETE ───┘\n\n"; + + return $candidates; + } + + /** + * PHASE 2: Select one candidate - WooCommerce economics focused + * + * @param array $input + * @param array $candidates + * @return array + */ + private function runCandidateSelection( array $input, array $candidates ): array { + $this->global_runtime_debug .= "\n┌─── PHASE 2: WOOCOMMERCE CANDIDATE SELECTION ───┐\n"; + + if ( empty( $candidates ) ) { + return [ + 'file' => $input['files'][0]['path'] ?? 'unknown', + 'line' => 1, + 'rationale' => 'Full file analysis - no specific WooCommerce integration points identified', + ]; + } + + if ( count( $candidates ) === 1 ) { + $selected = $candidates[0]; + $this->global_runtime_debug .= "Only one candidate, auto-selected\n"; + } else { + $candidate_list = ''; + foreach ( $candidates as $idx => $cand ) { + $candidate_list .= ( $idx + 1 ) . ". Line {$cand['line']}: {$cand['rationale']}\n"; + } + + $prompt = << 'json_schema', + 'json_schema' => [ + 'name' => 'candidate_selection', + 'strict' => true, + 'schema' => [ + 'type' => 'object', + 'properties' => [ + 'choice' => [ + 'type' => 'integer', + 'enum' => [ 1, 2, 3 ], + 'description' => 'The number (1, 2, or 3) of the selected candidate', + ], + 'reasoning' => [ + 'type' => 'string', + 'description' => 'Brief explanation of why this candidate was selected', + ], + ], + 'required' => [ 'choice', 'reasoning' ], + 'additionalProperties' => false, + ], + ], + ]; + + $previous_tool_choice = 'auto'; + $previous_temperature = 0.35; + $previous_response_format = null; + + try { + if ( strpos( \QIT_AI_Webserver\Lib\LLPhantBootstrap::getModel(), 'o4-mini' ) === false ) { + $this->chat->setModelOption( 'tool_choice', 'none' ); + $this->chat->setModelOption( 'temperature', 0.1 ); + } + $this->chat->setModelOption( 'response_format', $selection_schema ); + + $response = $this->chat->generateChat( [ Message::user( $prompt ) ] ); + } finally { + if ( strpos( \QIT_AI_Webserver\Lib\LLPhantBootstrap::getModel(), 'o4-mini' ) === false ) { + $this->chat->setModelOption( 'tool_choice', $previous_tool_choice ); + $this->chat->setModelOption( 'temperature', $previous_temperature ); + } + if ( $previous_response_format !== null ) { + $this->chat->setModelOption( 'response_format', $previous_response_format ); + } + } + + $this->global_runtime_debug .= "Selection response: {$response}\n"; + + // Parse guaranteed valid JSON + $parsed = json_decode( $response, true ); + $selected_idx = null; + + if ( $parsed && isset( $parsed['choice'] ) ) { + $selected_idx = (int) $parsed['choice'] - 1; + if ( isset( $parsed['reasoning'] ) ) { + $this->global_runtime_debug .= "Selection reasoning: {$parsed['reasoning']}\n"; + } + } + + if ( $selected_idx !== null && isset( $candidates[ $selected_idx ] ) ) { + $selected = $candidates[ $selected_idx ]; + $this->global_runtime_debug .= 'Selected candidate ' . ( $selected_idx + 1 ) . "\n"; + } else { + $selected = $candidates[0]; + $this->global_runtime_debug .= "Invalid selection, using first candidate\n"; + } + } + + $this->global_runtime_debug .= "Selected: {$selected['file']} line {$selected['line']}\n"; + $this->global_runtime_debug .= "Rationale: {$selected['rationale']}\n"; + $this->global_runtime_debug .= "└─── PHASE 2 COMPLETE ───┘\n\n"; + + return $selected; + } + + /** + * PHASE 3 & 4: Investigation with tools + final verdict + * + * @param array $input + * @param array $selected + * @return array + */ + private function runInvestigation( array $input, array $selected ): array { + $this->global_runtime_debug .= "\n┌─── PHASE 3 & 4: WOOCOMMERCE INVESTIGATION & VERDICT ───┐\n"; + $this->global_runtime_debug .= "Investigating: {$selected['file']} line {$selected['line']}\n\n"; + + return $this->runScreeningPass( $input, \QIT_AI_Webserver\Lib\LLPhantBootstrap::getModel(), $selected ); + } + + /** + * Modified screening pass for WooCommerce focus + * + * @param array $input + * @param string $model + * @param array $selected + * @return array + */ + private function runScreeningPass( array $input, string $model, array $selected ): array { + $files = $input['files']; + $plugin_meta = $input['plugin_meta']; + $file_path = $files[0]['path']; + + $static_context = $this->buildStaticAnalysisContext(); + + $user_msg_src = <<marketplace_slug ) ) { + $user_msg_src .= "\nMarketplace slug: {$this->marketplace_slug}"; + } + + $user_msg_src .= <<wc_version} + +```php +{$this->clipped_source_code} +``` +{$static_context} +TXT; + + $this->chat->setModelOption( 'tool_choice', 'auto' ); + if ( strpos( \QIT_AI_Webserver\Lib\LLPhantBootstrap::getModel(), 'o4-mini' ) === false ) { + $this->chat->setModelOption( 'temperature', 0.35 ); + } + + // Register tools + foreach ( $this->registry->getTools() as $t ) { + $this->chat->addTool( $t->getFunctionInfo() ); + } + + // Modified system message for WooCommerce + $this->chat->setSystemMessage( $this->getWooCommerceSystemPrompt( $files, $plugin_meta, $selected ) ); + + // Phase 2 reasoning (focused on WooCommerce impact) + $phase2_prompt = $user_msg_src . "\n\n**IMPORTANT: Focus your analysis on the selected location: {$selected['file']} around line {$selected['line']}. Reason: {$selected['rationale']}**\n\nAnalyze how this code interacts with WooCommerce and its potential impact on store economics. Provide your reasoning ONLY. DO NOT call any tools in this response."; + + $this->chat->setModelOption( 'tool_choice', 'none' ); + $reasoning = $this->chat->generateChatOrReturnFunctionCalled( + [ Message::user( $phase2_prompt ) ] + ); + $this->chat->setModelOption( 'tool_choice', 'auto' ); + + $this->global_runtime_debug .= "\n【PHASE 2 REASONING】\n"; + $this->global_runtime_debug .= "Model: {$model}\n"; + $this->global_runtime_debug .= "Reasoning:\n" . $reasoning . "\n"; + $this->global_runtime_debug .= "【/PHASE 2 REASONING】\n\n"; + + if ( is_array( $reasoning ) ) { + return [ + 'finding_id' => null, + 'status' => 'none', + 'error' => 'Phase-2 model attempted tool use despite instructions', + ]; + } + + $reasoning = trim( $reasoning ); + if ( $reasoning === '' ) { + return [ + 'finding_id' => null, + 'status' => 'none', + 'error' => 'Phase-2 model produced empty reasoning', + ]; + } + + $conv = [ + Message::user( $user_msg_src ), + Message::assistant( $reasoning ), + ]; + + // Add WooCommerce-focused investigation message + $conv[] = Message::user( + "Good reasoning. You are now in PHASE 3.\n\n" . + "🛠 You may read files under:\n" . + " – __SUT_DIR__/ (the extension)\n" . + " – __DEP_[woocommerce]__/ (WooCommerce core)\n" . + " – __WP_ROOT__/wp-includes/, wp-admin/ (WordPress core)\n" . + "but stay close to the selected file first.\n" . + "Use WooCommerce file reads only to clarify how the extension hooks into Woo internals.\n\n" . + "Helpful tool tactics:\n" . + "• search_strings([\"add_action\",\"add_filter\"], \"__SUT_DIR__/\") – map Woo hooks\n" . + "• find_hooks(\"__SUT_DIR__/\") followed by read_file() – inspect callback bodies\n" . + "• parse_php() on the selected class/method – confirm control/data flow\n" . + "• read_file(\"__DEP_[woocommerce]__/includes/class-wc-order.php\", ...) – understand API guarantees\n\n" . + "Start your investigation at {$selected['file']} ≈ line {$selected['line']}, but use tools to trace the code flow and find the actual vulnerability wherever it may be in this file.\n" . + 'You have a maximum of ' . self::INSPECTION_BUDGET . " tool calls to gather evidence.\n" . + 'Call inspection tools as needed, then when ready, provide your final JSON assessment.' + ); + + // Continue with modified tool loop + $screening_result = $this->screeningToolLoop( + $this->chat, + $conv, + $input, + $selected + ); + + $screening_result['result'] = $this->enrichResultWithContext( $screening_result['result'] ); + + return $screening_result; + } + + /** + * Modified screening tool loop with WooCommerce scope + * + * @param \LLPhant\Chat\ChatInterface $chat Chat interface. + * @param array $conv Conversation array. + * @param array $input Input parameters. + * @param array $selected Selected candidate. + * @return array Screening results. + */ + private function screeningToolLoop( + \LLPhant\Chat\ChatInterface $chat, + array $conv, + array $input, + array $selected + ): array { + $screening_debug = "\n== WOOCOMMERCE SCREENING TOOL LOOP STARTED ==\n"; + $screening_debug .= 'Time: ' . gmdate( 'Y-m-d H:i:s' ) . "\n"; + $screening_debug .= "Selected candidate: {$selected['file']} line {$selected['line']}\n\n"; + + $files = $input['files']; + $plugin_meta = $input['plugin_meta']; + $response_format = $input['response_format']; + + // State machine vars + $phase = self::PHASE_INVESTIGATE; + $max_turns = self::MAX_TURNS; + $invalid_json_streak = 0; + $empty_response_count = 0; + $all_tool_calls = []; + $screening_turn_log = []; + $duplicate_tool_usage = []; // Track duplicate tool usage + + $retrieval_made = false; + $inspection_made = false; + $unique_inspection_calls = []; + $inspection_turns = 0; + $tree_directory_count = 0; // Track tree_directory calls + + $screening_debug .= "\n╔════════════════════════════════════════════════════════════════╗\n"; + $screening_debug .= "║ STARTING TURN LOOP ║\n"; + $screening_debug .= "╚════════════════════════════════════════════════════════════════╝\n"; + $screening_debug .= "Max turns: {$max_turns}\n"; + $screening_debug .= "Initial phase: {$phase}\n"; + $screening_debug .= 'Inspection budget: ' . self::INSPECTION_BUDGET . "\n\n"; + + // Main loop + for ( $turn = 0; $turn < $max_turns; $turn++ ) { + $screening_debug .= "\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"; + $screening_debug .= "TURN {$turn} / {$max_turns}\n"; + $screening_debug .= "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"; + + $screening_debug .= "【CONVERSATION STATE】\n"; + $screening_debug .= "- Current phase: {$phase}\n"; + $screening_debug .= '- Messages in conv: ' . count( $conv ) . "\n"; + $screening_debug .= '- Tool calls so far: ' . count( $all_tool_calls ) . "\n"; + $screening_debug .= '- Unique inspection calls: ' . count( $unique_inspection_calls ) . '/' . self::INSPECTION_BUDGET . "\n"; + $screening_debug .= "【/CONVERSATION STATE】\n\n"; + + // Create debug directory if it doesn't exist + $debug_dir = rtrim( sys_get_temp_dir(), '/\\' ) . '/qit-node/debug'; + if ( ! is_dir( $debug_dir ) ) { + mkdir( $debug_dir, 0700, true ); + } + file_put_contents( $debug_dir . '/runtime_debug_turn.txt', $screening_debug ); + + $screening_debug .= ">>> Calling generateChatOrReturnFunctionCalled()...\n"; + $start_time = microtime( true ); + + try { + $answer = $chat->generateChatOrReturnFunctionCalled( $conv ); + } catch ( \Exception $e ) { + $screening_debug .= '❌ LLM call failed: ' . $e->getMessage() . "\n"; + $this->global_runtime_debug .= $screening_debug; + + return [ + 'result' => [ + 'finding_id' => null, + 'status' => 'error', + 'error' => 'LLM call failed: ' . $e->getMessage(), + ], + 'tool_calls' => $all_tool_calls, + 'iterations' => $turn + 1, + 'turn_log' => $screening_turn_log, + ]; + } + + $end_time = microtime( true ); + $call_duration = round( ( $end_time - $start_time ) * 1000, 2 ); + $screening_debug .= "<<< Response received in {$call_duration}ms\n\n"; + + // Process string response + if ( is_string( $answer ) ) { + $screening_debug .= "【PROCESSING STRING RESPONSE】\n"; + + // Check for excessive empty responses early + if ( trim( $answer ) === '' ) { + ++$empty_response_count; + $screening_debug .= "⚠️ Empty response detected (count: {$empty_response_count})\n"; + + if ( $empty_response_count >= 2 ) { + $this->global_runtime_debug .= $screening_debug; + + return [ + 'finding_id' => null, + 'status' => 'not_vulnerable', + 'error' => 'Unable to analyze - empty responses', + 'note' => 'The scanner was unable to properly analyze this file', + ]; + } + + $conv[] = Message::assistant( 'I returned an empty answer. Let me rethink and answer properly.' ); + $conv[] = Message::user( + $phase === self::PHASE_VERDICT + ? 'Please return **only** the JSON object that matches the schema now. NO TOOLS.' + : "Please either call a specific tool to investigate {$selected['file']} or output the final JSON verdict." + ); + continue; + } + + $empty_response_count = 0; + $screening_debug .= 'Response content (' . strlen( $answer ) . " chars)\n"; + + // Try to extract JSON + $parsed = $this->tryExtractJson( $answer ); + + if ( $parsed && array_key_exists( 'finding_id', $parsed ) && isset( $parsed['status'] ) ) { + // Validate it's related to selected candidate + if ( ! $this->validateVerdictScope( $parsed, $selected ) ) { + $screening_debug .= "⚠️ Verdict is outside selected scope\n"; + $conv[] = Message::user( + "Your verdict must relate to the selected location: {$selected['file']} around line {$selected['line']}. " . + "Either report a vulnerability at that location or return 'not_vulnerable' status." + ); + continue; + } + + // Check if it's a known static issue + if ( $parsed['status'] === 'vulnerable' && ! $this->isNewVulnerability( $parsed ) ) { + $screening_debug .= "⚠️ Filtered out: Known static analysis issue\n"; + $conv[] = Message::user( + 'This appears to be a basic input validation issue already found by static analysis. ' . + "Please focus on logical vulnerabilities that affect WooCommerce store economics or provide a 'not_vulnerable' verdict." + ); + continue; + } + + $screening_debug .= "✅ Valid JSON extracted!\n"; + $this->global_runtime_debug .= $screening_debug; + + return [ + 'result' => $parsed, + 'tool_calls' => $all_tool_calls, + 'iterations' => $turn + 1, + 'turn_log' => $screening_turn_log, + ]; + } + + // Check for loops + if ( $this->checkConversationLoop( $answer ) ) { + $this->global_runtime_debug .= $screening_debug; + + return [ + 'result' => [ + 'finding_id' => null, + 'status' => 'none', + 'error' => 'loop detected', + ], + 'tool_calls' => $all_tool_calls, + 'iterations' => $turn + 1, + 'turn_log' => $screening_turn_log, + ]; + } + + // Handle consecutive thinking with better prompting + if ( $phase === self::PHASE_INVESTIGATE && ! empty( $unique_inspection_calls ) ) { + ++$inspection_turns; + $screening_debug .= "Consecutive thinking turn: {$inspection_turns}\n"; + + // Give more guidance after first thinking turn + if ( $inspection_turns === 1 ) { + $conv[] = Message::user( + "Good analysis. Now use specific tools to investigate {$selected['file']}:\n" . + "• read_file to examine the process_refund method (lines 1050-1080)\n" . + "• search_strings to find related validation code\n" . + "• parse_php to understand the code structure\n" . + 'Or provide your final JSON verdict if you have enough evidence.' + ); + continue; + } + + if ( $inspection_turns >= 2 && $phase !== self::PHASE_VERDICT ) { + $phase = self::PHASE_VERDICT; + $chat->setModelOption( 'response_format', $response_format ); + $conv[] = Message::user( + "You have gathered sufficient evidence. You are now in PHASE 4.\n" . + "• If vulnerable, clearly state how the flaw impacts WooCommerce store integrity\n" . + " (e.g. \"attacker can reduce order total\", \"refund endpoint unauthenticated\").\n" . + "• Link evidence lines from BOTH the extension and WooCommerce core (if consulted)\n" . + " in vulnerability_chain or related_static_findings.\n" . + "Return ONLY the JSON verdict about {$selected['file']} line {$selected['line']}. NO TOOLS." + ); + continue; + } + } + + $conv[] = Message::assistant( $answer ); + $conv[] = Message::user( + $phase === self::PHASE_VERDICT + ? 'Return **only** the JSON object for your WooCommerce vulnerability assessment. NO TOOLS.' + : "Continue investigating {$selected['file']} or provide your final JSON verdict." + ); + continue; + } + + // Process tool calls + if ( is_array( $answer ) ) { + $screening_debug .= "【PROCESSING TOOL CALLS】\n"; + + if ( $phase === self::PHASE_VERDICT ) { + $conv[] = Message::user( + 'You are in PHASE 4. Tool calls are not allowed. Please provide the final JSON assessment only.' + ); + continue; + } + + $validated_calls = []; + foreach ( $answer as $tool_call ) { + $validated_calls[] = $tool_call; + } + + // Process valid tool calls + $invalid_json_streak = 0; + $inspection_turns = 0; + + $refs = [ + 'all' => &$all_tool_calls, + 'ok' => 0, + 'evidence' => [], + 'step_idx' => - 1, + ]; + $calls = array_map( + // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase -- External library property + fn( $fi ) => [ $fi->name, json_decode( $fi->jsonArgs ?: '{}', true ) ], + $validated_calls + ); + + $this->runToolBatch( $chat, $conv, $calls, $refs, $screening_debug ); + + // Track unique calls and prevent excessive duplicates + $new_unique_calls_this_turn = 0; + $duplicate_calls_this_turn = []; + + foreach ( $calls as [$tool, $args] ) { + if ( ! in_array( $tool, self::RETRIEVAL_TOOLS, true ) ) { + $hash = $tool . ':' . md5( json_encode( $args ) ); + + // Track duplicate tool usage + if ( ! isset( $duplicate_tool_usage[ $hash ] ) ) { + $duplicate_tool_usage[ $hash ] = 1; + } else { + ++$duplicate_tool_usage[ $hash ]; + $duplicate_calls_this_turn[ $hash ] = [ + 'tool' => $tool, + 'count' => $duplicate_tool_usage[ $hash ], + ]; + } + + if ( ! isset( $unique_inspection_calls[ $hash ] ) ) { + $unique_inspection_calls[ $hash ] = true; + ++$new_unique_calls_this_turn; + } + } + } + + // If duplicate calls were made, provide nagging feedback + if ( ! empty( $duplicate_calls_this_turn ) ) { + $screening_debug .= "⚠️ Duplicate tool calls detected\n"; + $nag_message = "I notice you're using the same tools repeatedly:\n"; + + foreach ( $duplicate_calls_this_turn as $hash => $info ) { + $nag_message .= "• Tool `{$info['tool']}` has been called {$info['count']} times with the same parameters\n"; + } + + $nag_message .= "\nConsider trying different approaches or parameters to gather new information."; + $conv[] = Message::user( $nag_message ); + } + + // If no new unique calls were made, prompt for different approach + if ( $new_unique_calls_this_turn === 0 && count( $unique_inspection_calls ) > 0 ) { + $screening_debug .= "⚠️ No new unique tool calls made\n"; + $conv[] = Message::user( + "You're repeating the same tool calls. Try a different approach:\n" . + "• Use read_file with specific line ranges to examine the vulnerability\n" . + "• Use search_strings with different keywords\n" . + '• Or provide your final JSON verdict based on current evidence.' + ); + continue; + } + + // Check budget + if ( count( $unique_inspection_calls ) >= self::INSPECTION_BUDGET ) { + $phase = self::PHASE_VERDICT; + $chat->setModelOption( 'response_format', $response_format ); + $conv[] = Message::user( + 'You have reached the inspection budget. Provide your final JSON verdict now focusing on WooCommerce impact. NO MORE TOOLS.' + ); + } + + continue; + } + + // Fallback + $conv[] = Message::assistant( '[unrecognised output]' ); + } + + $this->global_runtime_debug .= $screening_debug; + + return [ + 'result' => [ + 'finding_id' => null, + 'status' => 'none', + 'error' => 'timeout', + ], + 'tool_calls' => $all_tool_calls, + 'iterations' => $turn + 1, + 'turn_log' => $screening_turn_log, + 'max_turns_reached' => true, + ]; + } + + /** + * WooCommerce-specific system prompt + * + * @param array $files + * @param array $plugin_meta + * @param array $selected + */ + private function getWooCommerceSystemPrompt( array $files, array $plugin_meta, array $selected = [] ): string { + $static_analysis_section = ''; + $known_issue_types = []; + + if ( ! empty( $this->static_analysis_results ) ) { + $static_analysis_section = "\n\n**STATIC ANALYSIS BASELINE**\n"; + $static_analysis_section .= "The following issues were already found by static analysis tools:\n\n"; + + $grouped_issues = []; + foreach ( $this->static_analysis_results as $result ) { + $message = $result['message'] ?? 'Unknown'; + $line = $result['line'] ?? 'Unknown'; + $severity = $result['ai_category'] ?? ( $result['severity'] ?? 'Unknown' ); + + $message_key = $this->normalizeStaticMessage( $message ); + if ( ! isset( $grouped_issues[ $message_key ] ) ) { + $grouped_issues[ $message_key ] = [ + 'message' => $message, + 'lines' => [], + 'severity' => $severity, + ]; + } + $grouped_issues[ $message_key ]['lines'][] = $line; + + if ( stripos( $message, 'nonce' ) !== false ) { + $known_issue_types['nonce'] = true; + } + if ( preg_match( '/sanitize|parse_str/i', $message ) ) { + $known_issue_types['sanitization'] = true; + } + } + + foreach ( $grouped_issues as $issue ) { + $lines = array_unique( $issue['lines'] ); + sort( $lines ); + $static_analysis_section .= "• {$issue['message']} (Lines: " . implode( ', ', $lines ) . ", Severity: {$issue['severity']})\n"; + } + + $static_analysis_section .= "\n**FILTERING GUIDANCE:**\n"; + $static_analysis_section .= "• These issues are ALREADY KNOWN - do not report them individually\n"; + $static_analysis_section .= "• Only mention them if they enable a MORE SEVERE vulnerability when combined with other issues\n"; + $static_analysis_section .= "• Focus on vulnerabilities that static analysis CANNOT detect\n"; + } + + $available_tools = ''; + $placeholder_examples = ''; + + // Check if available_tools exists + if ( $this->available_tools !== null && ! empty( $this->available_tools ) ) { + $available_tools = "\n\n**Available tools**:\n"; + foreach ( $this->available_tools as $tool ) { + if ( ! empty( $tool['description'] ) ) { + $available_tools .= "• {$tool['name']} ({$tool['description']})\n"; + } else { + $available_tools .= "• {$tool['name']}\n"; + } + } + } + + // WooCommerce-specific placeholders + $placeholder_examples = "\n\n**PATH PLACEHOLDERS FOR TOOLS**:\n"; + $placeholder_examples .= "When calling tools, use these placeholders at the START of paths:\n"; + $placeholder_examples .= "• __WP_ROOT__ - WordPress root directory\n"; + $placeholder_examples .= "• __SUT_DIR__ - The WooCommerce extension being analyzed\n"; + $placeholder_examples .= "• __DEP_[woocommerce]__ - WooCommerce plugin directory\n\n"; + + $placeholder_examples .= "**EXAMPLES**:\n"; + $placeholder_examples .= "• read_file(\"__SUT_DIR__/includes/class-payment-gateway.php\")\n"; + $placeholder_examples .= "• search_strings([\"woocommerce_\"], \"__SUT_DIR__/\")\n"; + $placeholder_examples .= "• read_file(\"__DEP_[woocommerce]__/includes/class-wc-order.php\")\n"; + $placeholder_examples .= "\n**IMPORTANT**: Placeholders must be at the START of the path!\n"; + + // Add focus directive if we have a selected candidate + $focus_directive = ''; + if ( ! empty( $selected ) ) { + $focus_directive = "\n\n**FOCUS DIRECTIVE**\n"; + $focus_directive .= "Start your investigation at this location, but follow the code flow:\n"; + $focus_directive .= "• File: {$selected['file']}\n"; + $focus_directive .= "• Line: ≈ {$selected['line']}\n"; + $focus_directive .= "• Reason: {$selected['rationale']}\n"; + $focus_directive .= "\nYou must ultimately return at most ONE vulnerability related to this location.\n"; + $focus_directive .= "Any additional suspicious locations should be ignored in this conversation.\n"; + } + + // Store budget value for HEREDOC + $budget = self::INSPECTION_BUDGET; + + // Build the context section + $context_section = <<marketplace_slug ) ) { + $context_section .= "\n- Marketplace slug: {$this->marketplace_slug} (infer integration patterns from this)"; + } + + $context_section .= "\n- " . $this->wc_version; + + return << $static_results + * @return array + */ + private function normalizeStatic( array $static_results ): array { + return json_decode( json_encode( $static_results ), true ); + } + + private function isKnownStaticIssue( string $file, int $line, string $issue_type = '' ): bool { + foreach ( $this->static_analysis_results as $static_result ) { + $static_file = $static_result['file'] ?? ''; + $static_line = $static_result['line'] ?? 0; + + if ( $static_file === $file && + abs( $static_line - $line ) <= self::LINE_FUZZ ) { + return true; + } + + if ( $static_file === $file && + ! empty( $issue_type ) && + stripos( $static_result['message'] ?? '', $issue_type ) !== false ) { + return true; + } + } + + return false; + } + + /** + * @return array + */ + private function getStaticAnalysisSummary(): array { + $summary = [ + 'nonce_issues' => [], + 'sanitization_issues' => [], + 'other_issues' => [], + ]; + + foreach ( $this->static_analysis_results as $idx => $result ) { + $line = $result['line'] ?? 'unknown'; + $message = $result['message'] ?? ''; + + if ( stripos( $message, 'nonce' ) !== false ) { + $summary['nonce_issues'][] = "Line {$line}"; + } elseif ( preg_match( '/sanitize|parse_str/i', $message ) ) { + $summary['sanitization_issues'][] = "Line {$line}"; + } else { + $summary['other_issues'][] = "Line {$line}: " . substr( $message, 0, 50 ); + } + } + + return $summary; + } + + private function buildStaticAnalysisContext(): string { + $summary = $this->getStaticAnalysisSummary(); + $context = []; + + if ( ! empty( $summary['nonce_issues'] ) ) { + $context[] = 'Nonce validation issues on lines: ' . implode( ', ', array_unique( $summary['nonce_issues'] ) ); + } + + if ( ! empty( $summary['sanitization_issues'] ) ) { + $context[] = 'Input sanitization issues on lines: ' . implode( ', ', array_unique( $summary['sanitization_issues'] ) ); + } + + if ( ! empty( $context ) ) { + return "\n\n**ALREADY DETECTED BY STATIC ANALYSIS:**\n" . implode( "\n", $context ) . + "\n\n**IMPORTANT:** Do not report these issues unless they enable a more severe vulnerability through chaining or context."; + } + + return ''; + } + + private function normalizeStaticMessage( string $message ): string { + $normalized = preg_replace( '/\$\w+\[\'[\w-]+\'\]/', '$VAR', $message ); + $normalized = preg_replace( '/line \d+/', 'line X', $normalized ); + + return md5( $normalized ); + } + + /** + * Check if this is a new vulnerability. + * + * @param array $parsed_result Parsed vulnerability result. + * @return bool True if new vulnerability, false if duplicate. + */ + private function isNewVulnerability( array $parsed_result ): bool { + if ( $parsed_result['status'] !== 'vulnerable' || + ! isset( $parsed_result['vulnerability'] ) || + empty( $parsed_result['vulnerability'] ) ) { + return true; + } + + $vuln = $parsed_result['vulnerability']; + $file = $vuln['file'] ?? ''; + $line = $vuln['line'] ?? 0; + $type = strtolower( $vuln['type'] ?? '' ); + + if ( $this->isKnownStaticIssue( $file, $line, $type ) ) { + if ( ! isset( $vuln['vulnerability_chain'] ) || empty( $vuln['vulnerability_chain'] ) ) { + return false; + } + } + + $static_types = [ 'input_validation', 'nonce_missing', 'sanitization_missing' ]; + if ( in_array( $type, $static_types, true ) && empty( $vuln['exploitability'] ) ) { + return false; + } + + return true; + } + + /** + * Enrich vulnerability result with additional context. + * + * @param array $result Vulnerability result to enrich. + * @return array Enriched result. + */ + private function enrichResultWithContext( array $result ): array { + if ( $result['status'] === 'vulnerable' && isset( $result['vulnerability'] ) ) { + $vuln = &$result['vulnerability']; + + if ( isset( $vuln['line'] ) && isset( $vuln['file'] ) ) { + $related_static = []; + foreach ( $this->static_analysis_results as $static ) { + if ( ( $static['file'] ?? '' ) === ( $vuln['file'] ?? '' ) && + abs( ( $static['line'] ?? 0 ) - $vuln['line'] ) <= 10 ) { + $related_static[] = $static['message'] ?? 'Unknown static issue'; + } + } + + if ( ! empty( $related_static ) ) { + $vuln['related_static_findings'] = $related_static; + $vuln['vulnerability_classification'] = 'complex_chain'; + } else { + $vuln['vulnerability_classification'] = 'logical_flaw'; + } + } + } + + $result['analysis_metadata'] = [ + 'static_issues_considered' => count( $this->static_analysis_results ), + 'focus_area' => 'woocommerce_business_logic_vulnerabilities', + 'static_baseline_applied' => true, + 'woocommerce_context' => true, + ]; + + return $result; + } + + private function resetDebugFiles(): void { + $files = [ + '/tmp/runtime_debug_turn.txt', + '/tmp/vulnerability_scan_context.json', + '/tmp/vulnerability_scan_complete_debug.txt', + ]; + + foreach ( $files as $file ) { + if ( file_exists( $file ) ) { + unlink( $file ); + $this->global_runtime_debug .= "Reset debug file: {$file}\n"; + } + } + } + + /** + * Save debug files for vulnerability scan. + * + * @param array $context_data Additional context data to save. + * @return void + */ + private function saveDebugFiles( array $context_data = [] ): void { + // Create debug directory if it doesn't exist + $debug_dir = rtrim( sys_get_temp_dir(), '/\\' ) . '/qit-node/debug'; + if ( ! is_dir( $debug_dir ) ) { + mkdir( $debug_dir, 0700, true ); + } + file_put_contents( $debug_dir . '/vulnerability_scan_complete_debug.txt', $this->global_runtime_debug ); + + if ( ! empty( $context_data ) ) { + $context_data['debug_log'] = $this->global_runtime_debug; + file_put_contents( + $debug_dir . '/vulnerability_scan_context.json', + json_encode( $context_data, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES ) + ); + } + + file_put_contents( $debug_dir . '/runtime_debug_turn.txt', $this->global_runtime_debug ); + } + + private function checkConversationLoop( string $current_message ): bool { + $current_hash = sha1( trim( $current_message ) ); + $last_hash = sha1( trim( $this->last_assistant_message ) ); + + if ( $current_hash === $last_hash && ! empty( $this->last_assistant_message ) ) { + return true; + } + + $this->last_assistant_message = $current_message; + + return false; + } + + /** + * Non-greedy JSON extraction with performance optimization + * Enhanced to check full string before truncation + * + * @param string $text Text to extract JSON from. + * @return array|null Parsed JSON array or null if invalid. + */ + private function tryExtractJson( string $text ): ?array { + // First try direct parse + $json = json_decode( $text, true ); + if ( json_last_error() === JSON_ERROR_NONE ) { + return $json; + } + + // Try regex on full string first (in case JSON is after 10KB mark) + if ( preg_match( '/\{(?:[^{}]|(?R))*\}/s', $text, $m ) ) { + $json = json_decode( $m[0], true ); + if ( json_last_error() === JSON_ERROR_NONE ) { + return $json; + } + } + + // If that failed and text is large, try with truncation + if ( strlen( $text ) > 10240 ) { + $truncated = substr( $text, 0, 10240 ); + if ( preg_match( '/\{(?:[^{}]|(?R))*\}/s', $truncated, $m ) ) { + $json = json_decode( $m[0], true ); + if ( json_last_error() === JSON_ERROR_NONE ) { + return $json; + } + } + } + + return null; + } + + /** + * Tool batch execution with exception safety + */ + private function runToolBatch( + \LLPhant\Chat\ChatInterface $chat, + array &$conv, + array $calls, + array &$refs, + string &$runtime_debug + ): void { + $runtime_debug .= "\n【EXECUTING " . count( $calls ) . " TOOL CALL(S)】\n"; + + /** @var array $cache */ + $cache = []; + + foreach ( $calls as $call_index => [$tool, $args] ) { + $empty_args = $args === []; + if ( $empty_args ) { + $args = []; + } + + $runtime_debug .= "\n【TOOL CALL " . ( $call_index + 1 ) . "】 {$tool}\n"; + $runtime_debug .= 'Arguments: ' . json_encode( $args, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES ) . "\n"; + + // Track WooCommerce API usage + if ( isset( $args['file'] ) && preg_match( '/class-wc-(\w+)/i', $args['file'], $matches ) ) { + $this->woo_interfaces[] = 'WC_' . $matches[1]; + } + + $canonical = $this->canonicalise( $args ); + $sig = $tool . ':' . md5( json_encode( $canonical ) ); // Tool name + canonical args hash + + if ( $empty_args ) { + $args = new \stdClass(); + } + + $tool_id = uniqid(); + $conv[] = Message::assistantAskingTools( [ new ToolCall( $tool_id, $tool, json_encode( $args ) ) ] ); + + if ( isset( $cache[ $sig ] ) ) { + $orig = $cache[ $sig ]; + $out = $orig['out'] + [ 'duplicate_of' => $orig['first_id'] ]; + $runtime_debug .= "💡 Duplicate detected – re-using result of tool ID {$orig['first_id']}\n"; + + $conv[] = Message::toolResult( json_encode( $out ), $tool_id ); + + $refs['all'][] = [ $tool, $args ]; + continue; + } + + $tools = $this->registry->getTools(); + if ( ! isset( $tools[ $tool ] ) ) { + $runtime_debug .= "Tool {$tool} not found in registry.\n"; + $out = [ + 'success' => false, + 'error' => "Unknown tool: {$tool}", + 'data' => null, + ]; + + $cache[ $sig ] = [ + 'out' => $out, + 'first_id' => $tool_id, + ]; + + $conv[] = Message::toolResult( json_encode( $out ), $tool_id ); + + $refs['all'][] = [ $tool, $args ]; + continue; + } + + if ( $empty_args ) { + $problems = $this->validateToolCall( $tool, [], $this->registry ); + } else { + $problems = $this->validateToolCall( $tool, $args, $this->registry ); + } + + if ( $problems ) { + $out = [ + 'success' => false, + 'error' => "Invalid tool call for {$tool}: " . implode( ' ', $problems ), + 'data' => null, + ]; + $runtime_debug .= 'Invalid tool call: ' . implode( ' ', $problems ) . "\n"; + } else { + // Wrap tool execution in try-catch + try { + $runtime_debug .= "Executing {$tool} …\n"; + if ( $empty_args ) { + $out = $this->registry->getTool( $tool )->execute( [] ); + } else { + $out = $this->registry->getTool( $tool )->execute( $args ); + } + } catch ( \Throwable $e ) { + $runtime_debug .= '❌ Tool execution failed: ' . $e->getMessage() . "\n"; + $out = [ + 'success' => false, + 'error' => 'Tool execution failed: ' . $e->getMessage(), + 'data' => null, + ]; + } + } + + $cache[ $sig ] = [ + 'out' => $out, + 'first_id' => $tool_id, + ]; + + $runtime_debug .= 'Result: '; + if ( isset( $out['success'] ) && $out['success'] ) { + $runtime_debug .= 'SUCCESS'; + if ( isset( $out['data']['results'] ) ) { + $count = count( $out['data']['results'] ); + $runtime_debug .= " - {$count} results found"; + if ( $count > 0 && $count <= 3 ) { + foreach ( $out['data']['results'] as $idx => $result ) { + $runtime_debug .= "\n [{$idx}] "; + if ( isset( $result['file'] ) ) { + $runtime_debug .= "{$result['file']}:{$result['line']} - "; + } + if ( isset( $result['snippet'] ) ) { + $runtime_debug .= substr( $result['snippet'], 0, 100 ) . '...'; + } + } + } + } elseif ( isset( $out['data']['content'] ) ) { + $lines = substr_count( $out['data']['content'], "\n" ); + $chars = strlen( $out['data']['content'] ); + $runtime_debug .= " - {$lines} lines, {$chars} chars"; + } + } else { + $runtime_debug .= 'FAILED - ' . ( $out['error'] ?? 'Unknown error' ); + } + $runtime_debug .= "\n【/TOOL CALL " . ( $call_index + 1 ) . "】\n"; + + $conv[] = Message::toolResult( json_encode( $out ), $tool_id ); + + $refs['all'][] = [ $tool, $args ]; + if ( ( $out['success'] ?? false ) ) { + ++$refs['ok']; + if ( in_array( $tool, self::EVIDENCE_TOOLS, true ) && ! $empty_args ) { + $refs['evidence'][] = [ + 'file' => $args['file'] ?? ( $args['directory_or_file'] ?? '' ), + 'range' => ( $args['start_line'] ?? 1 ) . '-' . ( $args['end_line'] ?? 1 ), + 'snippet' => $this->firstSnippet( $out['data'] ), + ]; + } + } + } + + // Make WooCommerce interfaces unique + $this->woo_interfaces = array_unique( $this->woo_interfaces ); + } + + private function validateToolCall( string $tool, array $args, ToolRegistry $registry ): array { + $tool_info = $registry->getTool( $tool ); + if ( ! $tool_info ) { + return [ "Unknown tool: {$tool}." ]; + } + + $schema = FunctionFormatter::formatOneFunctionToOpenAI( $tool_info->getFunctionInfo() )['parameters']; + $required = $schema['required'] ?? []; + + $missing = array_diff( $required, array_keys( $args ) ); + $errors = []; + foreach ( $missing as $m ) { + $errors[] = "Missing required argument \"{$m}\"."; + } + + return $errors; + } + + private function firstSnippet( mixed $data ): string { + if ( isset( $data['content'] ) ) { + return substr( $data['content'], 0, 1500 ); + } + + return substr( json_encode( $data, JSON_UNESCAPED_SLASHES ), 0, 1500 ); + } + + private function canonicalise( mixed $v ): mixed { + if ( is_array( $v ) ) { + if ( array_is_list( $v ) ) { + sort( $v ); + } else { + ksort( $v ); + } + foreach ( $v as &$child ) { + $child = $this->canonicalise( $child ); + } + } + + return $v; + } + + private function validateVerdictScope( array $verdict, array $selected ): bool { + if ( $verdict['status'] !== 'vulnerable' ) { + return true; + } + + if ( ! isset( $verdict['vulnerability'] ) ) { + return false; + } + + // Just ensure same file - that's the only real constraint + $vuln_file = preg_replace( '/__(?:WP_ROOT|SUT_DIR|DEP_\[[^\]]+\])__\//i', '', + $verdict['vulnerability']['file'] ?? '' ); + $sel_file = preg_replace( '/__(?:WP_ROOT|SUT_DIR|DEP_\[[^\]]+\])__\//i', '', + $selected['file'] ); + + return $vuln_file === $sel_file; + } +} diff --git a/src/src/AI/webserver/Endpoints/ZipExtractionEndpoint.php b/src/src/AI/webserver/Endpoints/ZipExtractionEndpoint.php new file mode 100644 index 000000000..5e7d3df59 --- /dev/null +++ b/src/src/AI/webserver/Endpoints/ZipExtractionEndpoint.php @@ -0,0 +1,752 @@ +log_info( 'Starting ZIP extraction endpoint', [ + 'input_keys' => array_keys( $input ), + ] ); + + NodeResponse::mark( 'parameter_validation' ); + $validation_result = $this->validateParameters( $input ); + if ( $validation_result !== true ) { + // validateParameters returns error response string if validation fails + return $validation_result; + } + + $zip_url = $input['zip_url']; + $session_id = $input['session_id'] ?? md5( uniqid() ); + + NodeResponse::mark( 'path_preparation' ); + $temp_base = sys_get_temp_dir(); + $extract_to = $temp_base . '/qit-code-analysis-' . $session_id; + + NodeResponse::mark( 'security_validation' ); + $security_result = $this->validateExtractionSecurity( $temp_base, $extract_to ); + if ( $security_result !== true ) { + // validateExtractionSecurity returns error response string if validation fails + return $security_result; + } + + try { + NodeResponse::mark( 'extraction_start' ); + return $this->performExtraction( $zip_url, $extract_to, $input ); + } catch ( Exception $e ) { + return $this->handleExtractionError( $e ); + } + } + + /* + ===================================================================== + * Parameter & path validation helpers (unchanged) + * =================================================================== + */ + /** + * Validate required parameters + * + * @param array $input Input parameters. + * @return mixed Error response string or true if valid + */ + private function validateParameters( array $input ) { + foreach ( [ 'zip_url', 'session_id' ] as $k ) { + if ( empty( $input[ $k ] ) ) { + $this->log_error( "Missing $k parameter" ); + // Note: NodeResponse::error will set the HTTP status code in the JSON response + // The router.worker.php will handle setting the actual HTTP status code + return json_encode( NodeResponse::error( "Missing $k parameter", 400 ) ); + } + } + + return true; + } + + + /** + * Validate extraction security + * + * @param string $temp_base Temporary base directory. + * @param string $extract_to Extraction target directory. + * @return mixed Error response string or true if valid. + */ + private function validateExtractionSecurity( + string $temp_base, + string $extract_to + ) { + + $real_temp_base = realpath( $temp_base ); + if ( $real_temp_base === false ) { + // Note: NodeResponse::error will set the HTTP status code in the JSON response + // The router.worker.php will handle setting the actual HTTP status code + return json_encode( NodeResponse::error( 'Failed to resolve temp directory path', 500 ) ); + } + + $extract_parent = dirname( $extract_to ); + if ( ! is_dir( $extract_parent ) && ! mkdir( $extract_parent, 0777, true ) ) { + return json_encode( NodeResponse::error( 'Failed to create parent directory', 500 ) ); + } + + $real_extract_parent = realpath( $extract_parent ); + if ( $real_extract_parent === false || str_starts_with( $real_extract_parent, $real_temp_base ) === false ) { + return json_encode( NodeResponse::error( 'Directory traversal attempt detected', 400 ) ); + } + + return true; + } + + /* + ===================================================================== + * Precondition validation + * =================================================================== + */ + /** + * Validate precondition + * + * @param string $extract_to Extraction directory. + * @param array $config Configuration array. + * @return void + */ + private function validatePrecondition( string $extract_to, array $config ): void { + $requires = $config['requires'] ?? null; + + // 1. new_extraction_dir ───────────────────────────────────────────── + if ( $requires === 'new_extraction_dir' ) { + if ( is_dir( $extract_to ) ) { + throw new Exception( + 'Pre‑condition failed (new_extraction_dir): ' + . "$extract_to already exists" + ); + } + return; // nothing else to check + } + + // 2. wordpress_on_extraction_dir ──────────────────────────────────── + if ( $requires === 'wordpress_on_extraction_dir' ) { + if ( ! is_file( $extract_to . '/wp-includes/version.php' ) || + ! is_file( $extract_to . '/wp-admin/admin.php' ) ) { + throw new Exception( + 'Pre‑condition failed (wordpress_on_extraction_dir): ' + . "WordPress core not found in $extract_to" + ); + } + } + } + + /* + ===================================================================== + * Main flow + * =================================================================== + */ + /** + * Perform extraction + * + * @param string $zip_url URL of ZIP file. + * @param string $extract_to Extraction directory. + * @param array $params Parameters array. + * @return string|array Response string or error array. + */ + private function performExtraction( string $zip_url, string $extract_to, array $params ) { + + $session_id = $params['session_id'] ?? md5( $extract_to ); + $target_subdir = $params['config']['target_subdir'] ?? null; + + /* Workspace root is decided here, not by the Manager */ + $workspace_root = $extract_to; + + $extract_root = $target_subdir + ? rtrim( $workspace_root, '/' ) . '/' . trim( $target_subdir, '/' ) + : $workspace_root; + + $this->validatePrecondition( $workspace_root, $params['config'] ?? [] ); + + /* ensure the directory exists */ + if ( ! is_dir( $extract_root ) && ! mkdir( $extract_root, 0777, true ) ) { + throw new Exception( "Failed to create extraction directory: $extract_root" ); + } + + $zip_path = $this->downloadZipFile( $zip_url, $workspace_root ); + + $extraction_result = $this->extractZipFile( $zip_path, $extract_root, $params ); + + unlink( $zip_path ); + + if ( $extraction_result['file_count'] === 0 ) { + // Note: NodeResponse::error will set the HTTP status code in the JSON response + // The router.worker.php will handle setting the actual HTTP status code + return json_encode( NodeResponse::error( 'ZIP extraction failed – no files extracted', 422, [ 'files_extracted' => 0 ] ) ); + } + + touch( $workspace_root . '/.analyzed' ); + + // Create component manifest + $component_type = $params['config']['type'] ?? 'unknown'; + $structure = $this->detectWordPressStructure( $extract_root ); + + // Determine component details + $dep_type = 'unknown'; + $dep_slug = basename( $extract_root ); + + if ( $structure['is_plugin'] ) { + $dep_type = 'plugin'; + } elseif ( $structure['is_theme'] ) { + $dep_type = 'theme'; + } elseif ( $component_type === 'wordpress_core' ) { + $dep_type = 'wordpress_core'; + } elseif ( strpos( $dep_slug, 'dependency' ) !== false ) { + $dep_type = strpos( $dep_slug, 'premium' ) !== false ? 'dependency_premium' : 'dependency_free'; + } + + $manifest = [ + 'type' => $dep_type, + 'slug' => $dep_slug, + 'is_sut' => $component_type === 'sut', + ]; + + // Return the workspace root as extract_path, with proper prefix filtering + return $this->sendUnifiedExtractionResponse( $workspace_root, $extraction_result['file_count'], $params, $manifest ); + } + + /* + -------------------------------------------------------------------- + * Directory preparation + * ------------------------------------------------------------------ + */ + /** + * Prepare extraction directory + * + * @param string $extract_to Extraction directory path. + * @return void + */ + private function prepareExtractionDirectory( string $extract_to ): void { + + if ( is_dir( $extract_to ) ) { + $iterator = new RecursiveIteratorIterator( + new RecursiveDirectoryIterator( $extract_to, RecursiveDirectoryIterator::SKIP_DOTS ), + RecursiveIteratorIterator::CHILD_FIRST + ); + foreach ( $iterator as $file ) { + $file->isDir() ? rmdir( $file->getRealPath() ) : unlink( $file->getRealPath() ); + } + } + if ( ! is_dir( $extract_to ) && ! mkdir( $extract_to, 0777, true ) ) { + throw new Exception( 'Failed to create extraction directory' ); + } + } + + /** + * Download ZIP file + * + * @param string $zip_url URL of ZIP file. + * @param string $extract_to Extraction directory. + * @return string Path to downloaded ZIP file. + */ + private function downloadZipFile( string $zip_url, string $extract_to ): string { + + $zip_path = $extract_to . '/archive.zip'; + + $ch = curl_init( $zip_url ); + $fp = fopen( $zip_path, 'wb' ); + curl_setopt_array( $ch, [ + CURLOPT_FILE => $fp, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_TIMEOUT => 300, + ] ); + curl_exec( $ch ); + $http_code = curl_getinfo( $ch, CURLINFO_HTTP_CODE ); + curl_close( $ch ); + fclose( $fp ); + + if ( $http_code !== 200 ) { + throw new Exception( "Failed to download ZIP (HTTP $http_code)" ); + } + + $stat = stat( $zip_path ); + if ( $stat['size'] > self::MAX_ARCHIVE_SIZE_BYTES ) { + throw new Exception( 'Archive exceeds maximum allowed size' ); + } + + $finfo = new finfo( FILEINFO_MIME_TYPE ); + $mime = $finfo->file( $zip_path ); + if ( $mime !== 'application/zip' && $mime !== 'application/x-zip' && $mime !== 'application/octet-stream' ) { + throw new Exception( "Invalid MIME type for ZIP: $mime" ); + } + + return $zip_path; + } + + /* + -------------------------------------------------------------------- + * Extraction (SECURE) + * ------------------------------------------------------------------ + */ + /** + * Extract ZIP file + * + * @param string $zip_path Path to ZIP file. + * @param string $extract_to Extraction directory. + * @param array $params Parameters array. + * @return array Extraction statistics. + */ + private function extractZipFile( string $zip_path, string $extract_to, array $params = [] ): array { + + $stats = $this->secureExtractZip( $zip_path, $extract_to, $params ); + + $this->log_info( 'ZIP extracted securely', $stats ); + + return $stats; + } + + /** + * Secure extraction with per‑entry validation. + * + * @param string $zip_path Path to ZIP file. + * @param string $extract_root Root extraction directory. + * @param array $params Parameters array. + * @return array{file_count:int,extract_time:float} + * @throws Exception If ZIP file cannot be opened or extraction fails. + */ + private function secureExtractZip( string $zip_path, string $extract_root, array $params = [] ): array { + + $zip = new ZipArchive(); + if ( $zip->open( $zip_path ) !== true ) { + throw new Exception( 'Failed to open ZIP file' ); + } + + // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase -- External library property + if ( $zip->numFiles > self::MAX_ENTRIES ) { + $zip->close(); + throw new Exception( 'Archive contains too many entries' ); + } + + $total_uncompressed = 0; + $extracted_file_count = 0; // Track actual extracted files, not directories + $start = microtime( true ); + + // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase -- External library property + for ( $i = 0; $i < $zip->numFiles; $i++ ) { + + $stat = $zip->statIndex( $i ); + $original_name = $stat['name']; + $name = $original_name; + + // Strip "wordpress/" prefix for WordPress core extraction + $is_word_press_core = ( $params['config']['type'] ?? '' ) === 'wordpress_core'; + if ( $is_word_press_core && str_starts_with( $name, 'wordpress/' ) ) { + $name = substr( $name, 10 ); // Remove "wordpress/" (10 characters) + // Skip if the entry becomes empty after stripping the prefix + if ( empty( $name ) ) { + continue; + } + } + + // Strip extra nested directory for plugins/themes with redundant parent directories + $is_plugin = in_array( $params['config']['type'] ?? '', [ 'sut', 'plugin', 'theme', 'premium_dependency', 'free_dependency' ], true ); + if ( $is_plugin && ! $is_word_press_core ) { + // Get the expected plugin/theme name from target_subdir + $target_subdir = $params['config']['target_subdir'] ?? ''; + if ( ! empty( $target_subdir ) ) { + // Extract the plugin/theme name from target_subdir (e.g., "wp-content/plugins/fortis-for-woocommerce" -> "fortis-for-woocommerce") + $expected_name = basename( $target_subdir ); + + // Check if the entry starts with the expected plugin name followed by a slash + if ( str_starts_with( $name, $expected_name . '/' ) ) { + $name = substr( $name, strlen( $expected_name ) + 1 ); // Remove "plugin-name/" prefix + // Skip if the entry becomes empty after stripping the prefix + if ( empty( $name ) ) { + continue; + } + } + } + } + + // Normalise & validate path + $target_path = $this->canonicalisePath( $extract_root, $name ); + + // Reject directory traversal + if ( str_starts_with( $target_path, $extract_root ) === false ) { + $zip->close(); + throw new Exception( "Zip entry attempts path traversal: {$name}" ); + } + + // Reject symlinks / special files + if ( isset( $stat['external_attributes'] ) && ( $stat['external_attributes'] >> 16 ) & 0xA000 ) { // 0xA000 = symlink + $zip->close(); + throw new Exception( "Zip entry is a symlink: {$name}" ); + } + + // Compression‑ratio guard + if ( $stat['size'] > 0 && ( $stat['comp_size'] > 0 ) ) { + $ratio = $stat['size'] / $stat['comp_size']; + if ( $ratio > self::MAX_COMPRESSION_RATIO ) { + $zip->close(); + throw new Exception( "Excessive compression ratio on {$name}" ); + } + } + + $total_uncompressed += $stat['size']; + if ( $total_uncompressed > self::MAX_UNCOMPRESSED_TOTAL_BYTES ) { + $zip->close(); + throw new Exception( 'Total uncompressed size limit exceeded' ); + } + + // Ensure directory exists + $dir = dirname( $target_path ); + if ( ! is_dir( $dir ) && ! mkdir( $dir, 0777, true ) ) { + $zip->close(); + throw new Exception( "Failed to create directory: $dir" ); + } + + if ( substr( $name, -1 ) === '/' ) { + // Directory entry + if ( ! is_dir( $target_path ) && ! mkdir( $target_path, 0777, true ) ) { + $zip->close(); + throw new Exception( "Failed to create directory: $target_path" ); + } + continue; // Don't count directories as extracted files + } + + $stream = $zip->getStream( $original_name ); + if ( ! $stream ) { + $zip->close(); + throw new Exception( "Failed to read entry: {$original_name}" ); + } + + $out = fopen( $target_path, 'wb' ); + if ( ! $out ) { + $zip->close(); + throw new Exception( "Cannot write file: {$target_path}" ); + } + + stream_copy_to_stream( $stream, $out ); + fclose( $stream ); + fclose( $out ); + chmod( $target_path, 0644 ); + + ++$extracted_file_count; // Only count actual files that were extracted + } + + $zip->close(); + + return [ + 'file_count' => $extracted_file_count, // Return actual extracted file count + 'extract_time' => microtime( true ) - $start, + ]; + } + + /** + * Resolve $entry_path against $root securely (no "..", no back‑slashes). + * + * @param string $root Root directory. + * @param string $entry_path Entry path to canonicalize. + * @return string Canonicalized path. + */ + private function canonicalisePath( string $root, string $entry_path ): string { + $entry_path = str_replace( [ '\\', "\0" ], '/', $entry_path ); + $entry_path = preg_replace( '#/+#', '/', $entry_path ); + $parts = []; + foreach ( explode( '/', $entry_path ) as $part ) { + if ( $part === '' || $part === '.' ) { + continue; + } + if ( $part === '..' ) { + array_pop( $parts ); + continue; + } + $parts[] = $part; + } + + return rtrim( $root, '/' ) . '/' . implode( '/', $parts ); + } + + /** + * Send unified extraction response with both stats and file list + * + * @param string $actual_extract_path Actual extraction path. + * @param int $file_count Number of files extracted. + * @param array $params Request parameters. + * @param array $manifest Optional component manifest. + * @return array Response array. + */ + private function sendUnifiedExtractionResponse( string $actual_extract_path, int $file_count, array $params, array $manifest = [] ) { + $list_files = $params['config']['list_files'] ?? true; // ★ new flag + $file_pattern = $params['config']['file_pattern'] ?? '*.php'; + $resolver = new FilePathResolver( $actual_extract_path ); + $discovered_files = []; + $php_files = []; + $total_files = 0; + + $iterator = new RecursiveIteratorIterator( + new RecursiveDirectoryIterator( $actual_extract_path, RecursiveDirectoryIterator::SKIP_DOTS ) + ); + + // Pre-calculate SUT directory prefix for filtering if this is a SUT component + $sut_prefix = null; + if ( ( $params['config']['type'] ?? '' ) === 'sut' ) { + $target_subdir = $params['config']['target_subdir'] ?? null; + $sut_prefix = $target_subdir ? trim( $target_subdir, '/' ) . '/' : null; + } + + foreach ( $iterator as $file ) { + if ( $file->isFile() ) { + $relative_path = $resolver->toRelative( $file->getPathname() ); + + // Filter files for SUT components - only include files under the SUT directory + if ( $sut_prefix !== null && ! str_starts_with( $relative_path, $sut_prefix ) ) { + continue; // Skip files not under the SUT directory + } + + ++$total_files; + + // Check if it's a PHP file for basic stats + if ( pathinfo( $file, PATHINFO_EXTENSION ) === 'php' ) { + $php_files[] = str_replace( $actual_extract_path . '/', '', $file->getPathname() ); + } + + // Always collect stats, but only build the heavy list if requested + if ( $list_files && fnmatch( $file_pattern, $file->getFilename() ) ) { + $priority = $this->calculateSecurityPriority( $relative_path ); + + // Calculate SHA-1 for the file + $sha1 = sha1_file( $file->getPathname() ); + + $discovered_files[] = [ + 'path' => $relative_path, + 'size' => $file->getSize(), + 'lines' => substr_count( file_get_contents( $file ), "\n" ) + 1, + 'priority' => $priority, + 'sha1' => $sha1, + ]; + } + } + } + + // Sort discovered files by priority (security-sensitive files first) + usort( $discovered_files, fn( $a, $b ) => $b['priority'] - $a['priority'] ); + + if ( empty( $php_files ) ) { + $this->log_warning( 'ZIP extracted successfully but no PHP files found', [ + 'files_extracted' => $file_count, + 'actual_path' => $actual_extract_path, + ] ); + } + + $this->log_info( 'PHP files detected', [ + 'php_files' => array_slice( $php_files, 0, 10 ), + 'count' => count( $php_files ), + ] ); + + // Send unified response with both stats and file discovery data + $response = [ + 'extract_path' => $actual_extract_path, + 'session_id' => $params['session_id'] ?? md5( $actual_extract_path ), + 'files_discovered' => $list_files ? $discovered_files : [], + 'stats' => [ + 'files_extracted' => $file_count, + 'php_files_found' => count( $php_files ), + 'total_files' => $total_files, + 'files_matching_pattern' => count( $discovered_files ), + ], + ]; + + // Add component manifest if provided + if ( ! empty( $manifest ) ) { + $response['component'] = $manifest; + } + + return json_encode( NodeResponse::success( $response ) ); + } + + + /** + * Handle extraction error + * + * @param Exception $e Exception that occurred. + * @return string JSON error response + */ + private function handleExtractionError( Exception $e ): string { + $this->log_error( 'ZIP extraction failed: ' . $e->getMessage() ); + + // Note: NodeResponse::error will set the HTTP status code in the JSON response + // The router.worker.php will handle setting the actual HTTP status code + return json_encode( NodeResponse::error( 'Extraction failed', 500, [ 'message' => $e->getMessage() ] ) ); + } + + + /** + * Detect WordPress plugin or theme structure + * + * @param string $directory Directory to check. + * + * @return array Structure information. + */ + private function detectWordPressStructure( string $directory ): array { + $result = [ + 'is_plugin' => false, + 'is_theme' => false, + 'type' => 'unknown', + 'main_file' => null, + ]; + + // Check for WordPress plugin + $php_files = glob( $directory . '/*.php' ); + foreach ( $php_files as $file ) { + $content = file_get_contents( $file ); + if ( strpos( $content, 'Plugin Name:' ) !== false ) { + $result['is_plugin'] = true; + $result['type'] = 'plugin'; + $result['main_file'] = basename( $file ); + + return $result; + } + } + + // Check for WordPress theme + if ( file_exists( $directory . '/style.css' ) ) { + $style_content = file_get_contents( $directory . '/style.css' ); + if ( strpos( $style_content, 'Theme Name:' ) !== false ) { + $result['is_theme'] = true; + $result['type'] = 'theme'; + $result['main_file'] = 'style.css'; + + return $result; + } + } + + // Check for theme with functions.php + if ( file_exists( $directory . '/functions.php' ) && file_exists( $directory . '/index.php' ) ) { + $result['is_theme'] = true; + $result['type'] = 'theme'; + $result['main_file'] = 'functions.php'; + + return $result; + } + + return $result; + } + + /** + * Calculate security priority for a file path + * + * @param string $file_path File path. + * + * @return int Priority score. + */ + private function calculateSecurityPriority( string $file_path ): int { + $priority_patterns = [ + '/ajax|admin-ajax/i' => 100, + '/admin\//i' => 90, + '/api|rest/i' => 85, + '/callback|webhook/i' => 80, + '/upload|download/i' => 75, + '/auth|login|register/i' => 70, + '/payment|checkout/i' => 65, + '/includes\//i' => 50, + '/template/i' => 30, + '/assets|css|js/i' => 10, + ]; + + foreach ( $priority_patterns as $pattern => $score ) { + if ( preg_match( $pattern, $file_path ) ) { + return $score; + } + } + + return 40; // Default priority + } + + /* ------------------------------------------------------ New helpers --- */ + + /** + * Return every top‑level directory in the workspace (one level deep). + * + * @param string $base Base directory to scan. + * @return array Array of workspace roots. + */ + private function getWorkspaceRoots( string $base ): array { + $roots = []; + foreach ( scandir( $base ) as $item ) { + if ( $item === '.' || $item === '..' ) { + continue; + } + if ( is_dir( $base . '/' . $item ) ) { + $roots[] = rtrim( $item, '/' ) . '/'; // keep trailing "/" + } + } + sort( $roots ); + return $roots; + } + + /** + * Write a small JSON file the prompt‑builder can read later. + * + * @param string $base Base directory. + * @param array $roots Directory roots. + * @return void + */ + private function writeContextFile( string $base, array $roots ): void { + $ctx = [ + 'contract_version' => 3, + 'roots' => $roots, + 'generated_at' => gmdate( DATE_ATOM ), + ]; + $file_path = $base . '/.ctx.json'; + $this->log_info('Attempting to write .ctx.json file', [ + 'file_path' => $file_path, + 'base_directory' => $base, + 'roots' => $roots, + 'directory_exists' => is_dir( $base ), + 'directory_writable' => is_writable( $base ), + ]); + + $result = file_put_contents( $file_path, json_encode( $ctx, JSON_PRETTY_PRINT ) ); + + if ( $result === false ) { + $this->log_error('Failed to write .ctx.json file', [ + 'file_path' => $file_path, + 'error' => error_get_last(), + ]); + } else { + $this->log_info('.ctx.json file written successfully', [ + 'file_path' => $file_path, + 'bytes_written' => $result, + 'file_exists' => file_exists( $file_path ), + ]); + } + } +} diff --git a/src/src/AI/webserver/Handlers/helpers.php b/src/src/AI/webserver/Handlers/helpers.php new file mode 100644 index 000000000..a03b1545e --- /dev/null +++ b/src/src/AI/webserver/Handlers/helpers.php @@ -0,0 +1,109 @@ + $dir ] ); + + return; + } + + log_debug( 'Removing directory safely', [ 'dir' => $dir ] ); + + $files = array_diff( scandir( $dir ), [ '.', '..' ] ); + $file_count = 0; + $dir_count = 0; + + foreach ( $files as $file ) { + $path = $dir . '/' . $file; + if ( is_dir( $path ) ) { + ++$dir_count; + remove_directory_safely( $path ); + } else { + ++$file_count; + unlink( $path ); + } + } + + rmdir( $dir ); + + log_debug( 'Directory removed', [ + 'dir' => $dir, + 'files_removed' => $file_count, + 'subdirs_removed' => $dir_count, + ] ); +} + +/** + * Cleanup old sessions periodically + */ +function cleanup_old_sessions(): void { + $cache_dir = sys_get_temp_dir() . '/qit-code-analysis'; + + if ( ! is_dir( $cache_dir ) ) { + log_debug( 'Cache directory does not exist, skipping cleanup', [ 'path' => $cache_dir ] ); + + return; + } + + log_debug( 'Starting cleanup of old sessions', [ 'cache_dir' => $cache_dir ] ); + + $now = time(); + $dirs_scanned = 0; + $dirs_removed = 0; + $dirs_skipped = 0; + $dirs_invalid = 0; + + foreach ( scandir( $cache_dir ) as $dir ) { + if ( $dir === '.' || $dir === '..' ) { + continue; + } + + ++$dirs_scanned; + $session_dir = $cache_dir . '/' . $dir; + $real_path = realpath( $session_dir ); + + // Verify it's really inside cache_dir + if ( $real_path === false || strpos( $real_path, realpath( $cache_dir ) ) !== 0 ) { + log_warning( 'Skipping directory outside cache_dir', [ 'dir' => $session_dir ] ); + ++$dirs_invalid; + continue; + } + + if ( is_dir( $real_path ) ) { + $mtime = filemtime( $real_path ); + $age_hours = round( ( $now - $mtime ) / 3600, 1 ); + + if ( $now - $mtime > 3600 ) { // 1 hour old + log_info( 'Removing old session directory', [ + 'dir' => $dir, + 'age_hours' => $age_hours, + ] ); + + // Use PHP's recursive directory removal instead of exec + remove_directory_safely( $real_path ); + ++$dirs_removed; + } else { + log_debug( 'Skipping recent session directory', [ + 'dir' => $dir, + 'age_hours' => $age_hours, + ] ); + ++$dirs_skipped; + } + } + } + + log_info( 'Session cleanup completed', [ + 'dirs_scanned' => $dirs_scanned, + 'dirs_removed' => $dirs_removed, + 'dirs_skipped' => $dirs_skipped, + 'dirs_invalid' => $dirs_invalid, + ] ); +} diff --git a/src/src/AI/webserver/Lib/CallbackSender.php b/src/src/AI/webserver/Lib/CallbackSender.php new file mode 100644 index 000000000..8787e3648 --- /dev/null +++ b/src/src/AI/webserver/Lib/CallbackSender.php @@ -0,0 +1,67 @@ + $response + * @param array $tool_calls + * @param array $metadata + */ + public function send_callback( + string $callback_url, + string $action_id, + array $response, + ?int $processing_time = null, + array $tool_calls = [], + array $metadata = [], + ?string $task_id = null + ): bool { + $data = [ + 'action_id' => $action_id, + 'response' => json_encode( $response ), + 'processing_time' => $processing_time, + 'tool_calls' => $tool_calls, + 'metadata' => $metadata, + ]; + + if ( $task_id !== null ) { + $data['task_id'] = $task_id; + } + + $result = OutboundRequest::callback( $callback_url, $data ); + + return $result['success']; + } + + /** + * Send error to callback URL + */ + public function send_error_callback( + string $callback_url, + string $action_id, + string $error_message, + ?string $task_id = null + ): bool { + $data = [ + 'action_id' => $action_id, + 'response' => json_encode( [ 'error' => $error_message ] ), + 'processing_time' => 0, + 'tool_calls' => [], + 'metadata' => [ 'error' => true ], + ]; + + if ( $task_id !== null ) { + $data['task_id'] = $task_id; + } + + $result = OutboundRequest::callback( $callback_url, $data ); + + return $result['success']; + } +} diff --git a/src/src/AI/webserver/Lib/DebugLogger.php b/src/src/AI/webserver/Lib/DebugLogger.php new file mode 100644 index 000000000..0798484f9 --- /dev/null +++ b/src/src/AI/webserver/Lib/DebugLogger.php @@ -0,0 +1,70 @@ + $payload + */ + public static function log( string $stage, array $payload ): void { + $debug_dir = rtrim( sys_get_temp_dir(), '/\\' ) . '/qit-node/debug'; + if ( ! is_dir( $debug_dir ) ) { + mkdir( $debug_dir, 0700, true ); + } + $log_file = $debug_dir . '/debug-prompt.log'; + + $dbg = is_file( $log_file ) + ? json_decode( file_get_contents( $log_file ), true ) ?? [] + : []; + + $dbg[] = [ + 'ts_ms' => (int) ( microtime( true ) * 1000 ), + 'stage' => $stage, + 'data' => $payload, + ]; + + file_put_contents( $log_file, + json_encode( $dbg, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES ) + ); + } + + public static function dir_tree( string $dir, int $depth = 2, int $max_lines = 400 ): string { + $dir = rtrim( $dir, '/\\' ); + + // -- 1) Try native `tree` + $cmd = 'command -v tree'; + if ( trim( shell_exec( $cmd ) ?? '' ) !== '' ) { // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.system_calls_shell_exec,WordPress.PHP.DiscouragedPHPFunctions.system_calls_shell_exec - This call is safe as-is. + $tree_cmd = sprintf( + 'tree -a -L %d --dirsfirst %s 2>/dev/null | head -n %d', + $depth, + escapeshellarg( $dir ), + $max_lines + ); + $out = shell_exec( $tree_cmd ); // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.system_calls_shell_exec,WordPress.PHP.DiscouragedPHPFunctions.system_calls_shell_exec - This call is safe as-is. + if ( $out !== null ) { + return $out; + } + } + + // -- 2) Fallback: PHP iterator + $lines = []; + $iter = new \RecursiveIteratorIterator( + new \RecursiveDirectoryIterator( $dir, + \FilesystemIterator::SKIP_DOTS | \FilesystemIterator::FOLLOW_SYMLINKS ), + \RecursiveIteratorIterator::SELF_FIRST + ); + $iter->setMaxDepth( $depth ); + foreach ( $iter as $path => $info ) { + $rel = substr( $path, strlen( $dir ) + 1 ); + $pad = str_repeat( '│ ', $iter->getDepth() ); + $lines[] = $pad . ( $info->isDir() ? '├── ' : '└── ' ) . $rel; + if ( count( $lines ) >= $max_lines ) { + break; + } + } + + return implode( "\n", $lines ); + } +} diff --git a/src/src/AI/webserver/Lib/ExtractPathResolver.php b/src/src/AI/webserver/Lib/ExtractPathResolver.php new file mode 100644 index 000000000..f84dc4909 --- /dev/null +++ b/src/src/AI/webserver/Lib/ExtractPathResolver.php @@ -0,0 +1,79 @@ + $input Input data that should contain extract_path. + * @return string Validated extract path + * @throws \RuntimeException If path resolution fails. + */ + public static function resolve( array $input ): string { + // Require extract_path from input (no fallback for deterministic behavior) + if ( ! isset( $input['extract_path'] ) || empty( $input['extract_path'] ) ) { + throw new \RuntimeException( + 'Extract path is required but not provided in input. ' . + 'Available input keys: ' . implode( ', ', array_keys( $input ) ) + ); + } + + $path = $input['extract_path']; + + // Validate it exists and is readable + if ( ! is_dir( $path ) ) { + throw new \RuntimeException( + "Extract path does not exist: {$path}. " . + 'Check that zip extraction completed successfully.' + ); + } + + if ( ! is_readable( $path ) ) { + throw new \RuntimeException( + "Extract path is not readable: {$path}. " . + 'Check directory permissions.' + ); + } + + return $path; + } + + /** + * Validate that an extract path is properly formatted and accessible + * + * @param string $path Path to validate. + * @return bool True if valid + */ + public static function is_valid_extract_path( string $path ): bool { + return ! empty( $path ) && is_dir( $path ) && is_readable( $path ); + } + + /** + * Get helpful error message for debugging path resolution issues + * + * @param array $input Input data to analyze. + * @return string Diagnostic message + */ + public static function get_diagnostic_message( array $input ): string { + $diagnostics = [ + 'has_extract_path' => isset( $input['extract_path'] ), + 'extract_path_value' => $input['extract_path'] ?? 'not_set', + 'extract_path_exists' => isset( $input['extract_path'] ) ? is_dir( $input['extract_path'] ) : false, + 'input_keys' => array_keys( $input ), + 'session_id' => $input['session_id'] ?? 'not_set', + ]; + + return 'Path resolution diagnostics: ' . json_encode( $diagnostics, JSON_PRETTY_PRINT ); + } +} diff --git a/src/src/AI/webserver/Lib/FactStore.php b/src/src/AI/webserver/Lib/FactStore.php new file mode 100644 index 000000000..3292eb687 --- /dev/null +++ b/src/src/AI/webserver/Lib/FactStore.php @@ -0,0 +1,104 @@ +> Facts cleared per request */ + public static array $facts = []; + + /** + * @param array $fact + */ + public static function add( array $fact ): void { + if ( ! isset( $fact['id'] ) ) { + $fact['id'] = uniqid( 'fact_', true ); + } + self::$facts[] = $fact; + } + + /** + * Browse by step/kind with newest‑first order + * + * @return array> + */ + public static function list( + ?int $step = null, + ?string $kind = null, + int $limit = 20 + ): array { + + $matches = array_filter(self::$facts, function ( $f ) use ( $step, $kind ) { + if ( $step !== null && ( $f['step'] ?? null ) !== $step ) { + return false; + } + if ( $kind !== null && ( $f['kind'] ?? null ) !== $kind ) { + return false; + } + return true; + }); + usort( $matches, fn( $a, $b ) => ( $b['step'] ?? 0 ) <=> ( $a['step'] ?? 0 ) ); + + return array_slice( $matches, 0, $limit ); + } + + /** + * Naive substring search – replace with embedding search when ready + * + * @return array> + */ + public static function search( string $query, int $k = 5 ): array { + $q = mb_strtolower( $query ); + $scored = []; + foreach ( self::$facts as $f ) { + $text = mb_strtolower( $f['summary'] ?? json_encode( $f ) ); + $pos = mb_strpos( $text, $q ); + if ( $pos !== false ) { + // lower position == better score + $scored[] = [ + 'score' => 1_000 - $pos, + 'fact' => $f, + ]; + } + } + usort( $scored, fn( $a, $b ) => $b['score'] <=> $a['score'] ); + + return array_slice( array_column( $scored, 'fact' ), 0, $k ); + } + + /** + * Get all facts + * + * @param array $filters Optional filters to apply + * @param int|null $limit Optional limit on number of results + * @return array> + */ + public static function list_all( array $filters = [], ?int $limit = null ): array { + $facts = self::$facts; + + // Apply filters + if ( ! empty( $filters ) ) { + $facts = array_filter($facts, function ( $fact ) use ( $filters ) { + foreach ( $filters as $key => $value ) { + if ( ! isset( $fact[ $key ] ) || $fact[ $key ] !== $value ) { + return false; + } + } + return true; + }); + } + + // Apply limit + if ( $limit !== null ) { + $facts = array_slice( $facts, 0, $limit ); + } + + return $facts; + } +} diff --git a/src/src/AI/webserver/Lib/FilePathResolver.php b/src/src/AI/webserver/Lib/FilePathResolver.php new file mode 100644 index 000000000..21b10edab --- /dev/null +++ b/src/src/AI/webserver/Lib/FilePathResolver.php @@ -0,0 +1,158 @@ +extract_path = rtrim( $extract_path, '/\\' ); + $this->root_dir = $this->extract_path; + $this->g = new ToolPathGuard( $this->extract_path, $sut_dir ); + } + + /** Convert *user* path → absolute canon path or throw */ + public function to_absolute( string $user_path ): string { + // For file operations, try to resolve using both WP-relative and SUT-relative paths + try { + return $this->g->resolve( $user_path ); + } catch ( \RuntimeException $e ) { + // Fallback to the old method for non-file paths or when resolve fails + $rel = $this->canon_relative( $user_path ); // throws if illegal + + // Always use the project root (extract_path) as the base + if ( $rel === '.' || $rel === '' ) { + return $this->extract_path; + } + + return $this->extract_path . '/' . $rel; + } + } + + /** Return *relative*, canonical path inside workspace */ + public function canon_relative( string $user_path ): string { + return $this->g->normalise( $user_path ); // may throw + } + + /** + * Convert an absolute path to relative + */ + public function to_relative( string $absolute_path ): string { + $absolute_path = $this->normalize( $absolute_path ); + $extract_path = $this->normalize( $this->extract_path ); + + // Remove extract path prefix + if ( strpos( $absolute_path, $extract_path ) === 0 ) { + $relative = substr( $absolute_path, strlen( $extract_path ) ); + + return ltrim( $relative, '/' ); + } + + // Path is already relative + return ltrim( $absolute_path, '/' ); + } + + /** + * Normalize path separators and remove redundant parts + */ + public function normalize( string $path ): string { + // Convert backslashes to forward slashes + $path = str_replace( '\\', '/', $path ); + + // Remove duplicate slashes + $path = preg_replace( '#/+#', '/', $path ); + + // Remove trailing slash + return rtrim( $path, '/' ); + } + + /** + * Check if a file exists (using relative path) + */ + public function file_exists( string $relative_path ): bool { + return file_exists( $this->to_absolute( $relative_path ) ); + } + + /** + * Read file contents (using relative path) + */ + public function read_file( string $relative_path ): string { + $absolute_path = $this->to_absolute( $relative_path ); + if ( ! file_exists( $absolute_path ) ) { + DebugLogger::log( 'read_file_error', [ + 'reason' => 'file_not_found', + 'relative_path' => $relative_path, + 'absolute_path' => $absolute_path, + 'work_dir' => $this->extract_path, + 'work_dir_tree' => DebugLogger::dir_tree( $this->extract_path ), + ] ); + throw new \RuntimeException( "File not found: $relative_path" ); + } + + $content = file_get_contents( $absolute_path ); + if ( $content === '' || filesize( $absolute_path ) === 0 ) { + DebugLogger::log( 'read_file_error', [ + 'reason' => 'empty_file', + 'relative_path' => $relative_path, + 'absolute_path' => $absolute_path, + 'work_dir' => $this->extract_path, + 'dir_tree' => DebugLogger::dir_tree( dirname( $absolute_path ) ), + ] ); + } + + return $content; + } + + /** + * Get file info (using relative path) + * + * @return array + */ + public function get_file_info( string $relative_path ): array { + $absolute_path = $this->to_absolute( $relative_path ); + if ( ! file_exists( $absolute_path ) ) { + throw new \RuntimeException( "File not found: $relative_path" ); + } + + $content = file_get_contents( $absolute_path ); + + return [ + 'path' => $relative_path, + 'absolute_path' => $absolute_path, + 'size' => filesize( $absolute_path ), + 'lines' => substr_count( $content, "\n" ) + 1, + 'extension' => pathinfo( $relative_path, PATHINFO_EXTENSION ), + ]; + } + + /** + * Convert absolute path to relative path + */ + public function toRelative( string $absolute_path ): string { + // Remove the root directory from the absolute path + $relative = str_replace( $this->root_dir, '', $absolute_path ); + // Remove leading slash + return ltrim( $relative, '/' ); + } +} diff --git a/src/src/AI/webserver/Lib/HeartbeatSender.php b/src/src/AI/webserver/Lib/HeartbeatSender.php new file mode 100644 index 000000000..5dfcabe53 --- /dev/null +++ b/src/src/AI/webserver/Lib/HeartbeatSender.php @@ -0,0 +1,37 @@ +node_id = $node_id; + $this->node_token = $node_token; + $this->heartbeat_url = rtrim( $heartbeat_url, '/' ); + $this->interval = $interval; + } + + /** Call on every poll‑loop iteration */ + public function maybe_send(): void { + if ( time() - $this->last_sent < $this->interval ) { + return; + } + $this->last_sent = time(); + + $data = [ + 'node_token' => $this->node_token, + 'busy' => file_exists( getenv( 'QIT_NODE_DIR' ) . '/busy.lock' ) ? 1 : 0, + 'last_error' => null, // Will be populated if there's an error file + 'system_info' => [ + 'memory_usage' => memory_get_usage( true ), + 'cpu_load' => sys_getloadavg()[0] ?? null, + ], + ]; + + OutboundRequest::heartbeat( $this->heartbeat_url, $data ); // Fire-and-forget, don't check result + } +} diff --git a/src/src/AI/webserver/Lib/JsonSchemaValidator.php b/src/src/AI/webserver/Lib/JsonSchemaValidator.php new file mode 100644 index 000000000..c7a34d149 --- /dev/null +++ b/src/src/AI/webserver/Lib/JsonSchemaValidator.php @@ -0,0 +1,200 @@ +schemas_path = __DIR__ . '/../schemas/'; + } + + public static function get_instance(): self { + if ( self::$instance === null ) { + self::$instance = new self(); + } + return self::$instance; + } + + /** + * Get singleton instance (camelCase alias) + */ + public static function getInstance(): self { + return self::get_instance(); + } + + /** + * Validate inbound request data (camelCase alias) + * + * @param array $data Request data to validate. + * @param string $request_type Request type (basic-prompt, vulnerability-scan, etc.). + * @return array Validation result with 'valid' boolean and 'errors' array + */ + public function validateInbound( array $data, string $request_type ): array { + return $this->validate_inbound( $data, $request_type ); + } + + /** + * Validate outbound request data (camelCase alias) + * + * @param array $data Request data to validate. + * @param string $request_type Request type (node-registration, task-callback-request-success, etc.). + * @return array Validation result with 'valid' boolean and 'errors' array + */ + public function validateOutbound( array $data, string $request_type ): array { + return $this->validate_outbound( $data, $request_type ); + } + + /** + * Validate inbound request data + * + * @param array $data Request data to validate. + * @param string $request_type Request type (basic-prompt, vulnerability-scan, etc.). + * @return array Validation result with 'valid' boolean and 'errors' array + */ + public function validate_inbound( array $data, string $request_type ): array { + $schema_path = $this->schemas_path . 'inbound/' . $request_type . '.json'; + return $this->validate_against_schema( $data, $schema_path ); + } + + /** + * Validate outbound request data + * + * @param array $data Request data to validate. + * @param string $request_type Request type (node-registration, task-callback-request-success, etc.). + * @return array Validation result with 'valid' boolean and 'errors' array + */ + public function validate_outbound( array $data, string $request_type ): array { + $schema_path = $this->schemas_path . 'outbound/' . $request_type . '.json'; + return $this->validate_against_schema( $data, $schema_path ); + } + + /** + * Validate data against a JSON schema using justinrainbow/json-schema + * + * @param array $payload Data to validate. + * @param string $schema_path Path to the JSON schema file. + * @return array Validation result with 'valid' boolean and 'errors' array + */ + private function validate_against_schema( array $payload, string $schema_path ): array { + // Log which schema we are about to use + if ( function_exists( '\\log_debug' ) ) { + \log_debug('Schema validation started', [ + 'schema' => basename( $schema_path ), + 'payload_keys' => array_keys( $payload ), + ]); + } + + if ( ! file_exists( $schema_path ) ) { + $msg = "Schema file not found: {$schema_path}"; + if ( function_exists( '\\log_error' ) ) { + \log_error( 'Schema validation failed – missing schema', [ 'schema' => $schema_path ] ); + } + return [ + 'valid' => false, + 'errors' => [ $msg ], + ]; + } + + $schema_content = file_get_contents( $schema_path ); + if ( $schema_content === false ) { + return [ + 'valid' => false, + 'errors' => [ "Failed to read schema file: {$schema_path}" ], + ]; + } + + $schema = json_decode( $schema_content ); + if ( json_last_error() !== JSON_ERROR_NONE ) { + $err = "Invalid JSON in schema file {$schema_path}: " . json_last_error_msg(); + if ( function_exists( '\\log_error' ) ) { + \log_error( 'Schema validation failed – invalid schema JSON', [ 'error' => $err ] ); + } + return [ + 'valid' => false, + 'errors' => [ $err ], + ]; + } + + $validator = new Validator(); + $payload_object = json_decode( json_encode( $payload ) ); // Convert array to object + $validator->validate( $payload_object, $schema, Constraint::CHECK_MODE_TYPE_CAST ); + + if ( $validator->isValid() ) { + if ( function_exists( '\\log_debug' ) ) { + \log_debug( 'Schema validation passed', [ 'schema' => basename( $schema_path ) ] ); + } + return [ + 'valid' => true, + 'errors' => [], + ]; + } + + $errors = array_map( + fn( $e ) => "{$e['property']}: {$e['message']}", + $validator->getErrors() + ); + + if ( function_exists( '\\log_warning' ) ) { + \log_warning('Schema validation failed', [ + 'schema' => basename( $schema_path ), + 'errors' => $errors, + ]); + } + + return [ + 'valid' => false, + 'errors' => $errors, + ]; + } + + /** + * Get list of available inbound schema types + * + * @return array + */ + public function get_inbound_schema_types(): array { + return $this->get_schema_types( 'inbound' ); + } + + /** + * Get list of available outbound schema types + * + * @return array + */ + public function get_outbound_schema_types(): array { + return $this->get_schema_types( 'outbound' ); + } + + /** + * Get schema types for a given direction + * + * @param string $type + * @return array + */ + private function get_schema_types( string $type ): array { + $schema_dir = $this->schemas_path . $type . '/'; + + if ( ! is_dir( $schema_dir ) ) { + return []; + } + + $files = glob( $schema_dir . '*.json' ); + $types = []; + + foreach ( $files as $file ) { + $types[] = basename( $file, '.json' ); + } + + return $types; + } +} diff --git a/src/src/AI/webserver/Lib/LLPhantBootstrap.php b/src/src/AI/webserver/Lib/LLPhantBootstrap.php new file mode 100644 index 000000000..6b3274ca6 --- /dev/null +++ b/src/src/AI/webserver/Lib/LLPhantBootstrap.php @@ -0,0 +1,537 @@ + */ + private array $allowed_models = [ + 'gpt-4o', + 'gpt-4o-mini', + 'gpt-4-turbo', + 'claude-3-5-sonnet-20241022', + ]; + + /** + * @var string|null + */ + protected static $current_provider = null; + + /** + * @var string|null + */ + protected static $current_model = null; + + /** + * @var mixed|null + */ + protected static $chat = null; + + /** + * @param array $conf + */ + public function boot( array $conf ): void { + $this->provider = $conf['provider'] ?? 'openai'; + + if ( ! in_array( $this->provider, self::PROVIDERS, true ) ) { + throw new Exception( "Unsupported provider: {$this->provider}" ); + } + + if ( isset( $conf['model'] ) && ! in_array( $conf['model'], $this->allowed_models, true ) ) { + throw new Exception( "Model '{$conf['model']}' is not in the allowed list." ); + } + + $this->chat_instance = $this->create_chat_instance( $conf ); + + if ( isset( $conf['model'] ) ) { + $this->chat_instance->setModelOption( 'model', $conf['model'] ); + } + } + + /** + * Create chat instance based on provider + * + * @param array $config + * @return ChatInterface + */ + private function create_chat_instance( array $config ): ChatInterface { + $this->initialize_provider( $config ); + return $this->chat_instance; + } + + public function chat(): ChatInterface { + return $this->chat_instance; + } + + /** + * Resolve model input to a string based on current provider + * + * @param mixed $model_input - Can be a string or an array with provider keys. + * @param string $provider - Current provider (openai, anthropic, lmstudio). + * + * @return string - Resolved model name + * @throws \InvalidArgumentException When model parameter is invalid. + */ + public static function resolve_model( $model_input, string $provider ): string { + // Validate input + if ( empty( $model_input ) ) { + throw new \InvalidArgumentException( 'Model parameter is required' ); + } + + // Handle string input directly (for backward compatibility and simpler usage) + if ( is_string( $model_input ) ) { + return $model_input; + } + + // Handle array format with provider keys + if ( is_array( $model_input ) ) { + // Multi-provider format + if ( ! isset( $model_input[ $provider ] ) ) { + $available = implode( ', ', array_keys( $model_input ) ); + throw new \InvalidArgumentException( + "Model not specified for provider '{$provider}'. Available: {$available}" + ); + } + $resolved_model = $model_input[ $provider ]; + + if ( empty( $resolved_model ) ) { + throw new \InvalidArgumentException( + "Empty model specified for provider '{$provider}'" + ); + } + + if ( ! empty( self::$allowed_models ) ) { + if ( ! in_array( $resolved_model, self::$allowed_models, true ) ) { + $available = implode( ', ', self::$allowed_models ); + throw new \InvalidArgumentException( + "Model '{$resolved_model}' is not allowed. Available models: {$available}" + ); + } + } + + return $resolved_model; + } + + // Invalid input type + throw new \InvalidArgumentException( + 'Model must be a string or an object with provider keys (e.g., {"openai": "gpt-4", "anthropic": "claude-3"})' + ); + } + + /** + * Set model - handles validation, resolution, downloading, and configuration + * + * @param mixed $model_input - Can be a string or an array with provider keys. + * @param string $provider - Current provider (openai, anthropic, lmstudio). + * + * @return bool - True if model was set successfully + * @throws \InvalidArgumentException When model parameter is invalid. + */ + public static function set_model( $model_input, string $provider ): bool { + // Store current provider + self::$current_provider = $provider; + + // 1. Resolve model + $resolved_model = self::resolve_model( $model_input, $provider ); + + // Store resolved model + self::$current_model = $resolved_model; + + // 2. Download model if needed (LM Studio only) + if ( $provider === 'lmstudio' ) { + if ( ! self::download_model_if_needed( $resolved_model ) ) { + throw new \InvalidArgumentException( + "Failed to ensure model '{$resolved_model}' is available in LM Studio" + ); + } + } + + // 3. Set model on chat instance + if ( self::$chat ) { + self::$chat->setModelOption( 'model', $resolved_model ); + } + + return true; + } + + /** + * Download model if needed for LM Studio + * + * @param string $model + * + * @return bool + */ + private static function download_model_if_needed( string $model ): bool { + // First check if model is already available + $instance = new self( 'lmstudio', [] ); + if ( $instance->check_lm_studio_model_availability( $model ) ) { + return true; // Model already available + } + + // TODO: Implement actual model downloading + // For now, we'll log that the model needs to be downloaded + // and return true to allow the process to continue + error_log( "Model '{$model}' not found in LM Studio. Please load it manually through LM Studio UI." ); // phpcs:ignore WordPress.PHP.DevelopmentFunctions.error_log_error_log + + // In a future implementation, this could: + // 1. Call LM Studio's model management API + // 2. Download from Hugging Face Hub + // 3. Use LM Studio CLI commands + + return true; // Allow process to continue for now + } + + /** + * Get current provider + */ + public static function get_current_provider(): string { + return self::$current_provider ?? 'unknown'; + } + + /** + * Get current resolved model + * + * @return string + */ + public static function get_model(): string { + return self::$current_model ?? 'unknown'; + } + + /** + * ────────── 2. KEEP THE REST OF THE ORIGINAL CLASS ─────── + */ + // (constructor, ensureInitialized, initializeProvider, generate* …) + + /** + * Configuration array. + * + * @var array + */ + private array $config = []; + + /** + * Logger instance. + * + * @var mixed + */ + private $logger = null; + + /** + * @param array $config + * @param mixed $logger + */ + public function __construct( array $config, $logger = null ) { + $this->config = $config; + $this->logger = $logger; + } + + private function compute_install_dir( array $composer_json ): string { + // Stable hash of the **desired** dependency graph (ignore formatting) + $hash = substr( sha1( json_encode( $composer_json, JSON_UNESCAPED_SLASHES ) ), 0, 12 ); + + // e.g. /tmp/qit-llphant-a1b2c3d4e5f6 + // Are we in Phar? + if ( \Phar::running() !== '' ) { + return sys_get_temp_dir() . '/qit-llphant-' . $hash; + } else { + return __DIR__ . '/../../dev/qit-llphant-' . $hash; + } + } + + public function get_chat(): ChatInterface { + return $this->chat_instance; + } + + public function reinitialize( array $runtime_config = [] ): void { + // invalidate old chat + $this->chat_instance = null; + $this->config = array_merge( $this->config, $runtime_config ); + $this->initialize_provider( $runtime_config ); + } + + /** + * Ensure the provider is initialized + * + * @param array $options Runtime options for initialization. + */ + public function ensure_initialized( array $options = [] ): void { + if ( ! $this->chat_instance ) { + $this->initialize_provider( $options ); + } + } + + private function ensure_ll_phant_installed( array $composer_json ): void { + // Check if composer is available + $composer_check = shell_exec( 'which composer 2>&1' ) ?: shell_exec( 'where composer 2>&1' ); // phpcs:ignore WordPress.PHP.DiscouragedPHPFunctions.system_calls_shell_exec + if ( empty( trim( $composer_check ) ) ) { + throw new Exception( 'Composer is not installed or not in PATH. Please install Composer first.' ); + } + + // Use a lock file so parallel PHP workers do not race + $lock = fopen( $this->install_dir . '.lock', 'c' ); + flock( $lock, LOCK_EX ); + + if ( file_exists( $this->install_dir . '/vendor/autoload.php' ) ) { + $this->log_info( 'LLPhant already installed at: ' . $this->install_dir ); + require_once $this->install_dir . '/vendor/autoload.php'; + flock( $lock, LOCK_UN ); + + return; + } + + $this->log_info( 'Installing LLPhant to: ' . $this->install_dir ); + + // Directory does not exist ⇒ create & install + mkdir( $this->install_dir, 0755, true ); + file_put_contents( + $this->install_dir . '/composer.json', + json_encode( $composer_json, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES ) + ); + + // Run composer install + $cmd = sprintf( + 'cd %s && composer install --no-dev --no-interaction --no-progress --ansi --ignore-platform-req=ext-gd 2>&1', + escapeshellarg( $this->install_dir ) + ); + + $output = []; + $return_code = 0; + exec( $cmd, $output, $return_code ); + + if ( $return_code !== 0 ) { + throw new Exception( 'Failed to install LLPhant: ' . implode( "\n", $output ) ); + } + + // Patch file. + $this->patch_ll_phant(); + + require_once $this->install_dir . '/vendor/autoload.php'; + $this->log_info( 'LLPhant installed successfully' ); + flock( $lock, LOCK_UN ); + } + + private function patch_ll_phant(): void { + $llphant_file = "{$this->install_dir}/vendor/theodo-group/llphant/src/Chat/OpenAIChat.php"; + file_put_contents( + $llphant_file, + str_replace( + [ + 'private function getToolsToCall(', + 'private array $tools', + ], + [ + 'protected function getToolsToCall(', + 'protected array $tools', + ], + file_get_contents( $llphant_file ) + ) + ); + } + + private function initialize_provider( array $runtime_options = [] ): void { + // Merge runtime options with constructor config + $config = array_merge( $this->config, $runtime_options ); + + switch ( $this->provider ) { + case 'lmstudio': // fall through + case 'openai': + $this->initialize_openai( $config ); + break; + case 'anthropic': + $this->initialize_anthropic( $config ); + break; + default: + throw new Exception( 'Unsupported provider: ' . $this->provider ); + } + } + + private function initialize_openai( array $config ): void { + if ( ! isset( $config['api_key'] ) ) { + throw new Exception( 'OpenAI requires an API key' ); + } + + $config = array_merge( [ + 'model' => 'o4-mini-2025-04-16', // Default to o4-mini-2025-04-16, but can be overridden + ], $config ); + + // Create OpenAIConfig object + $openai_config = new OpenAIConfig(); + $openai_config->api_key = $config['api_key']; + $openai_config->model = $config['model']; + + // Set custom base URL if provided (for LM Studio compatibility) + if ( ! empty( $config['base_url'] ) ) { + $openai_config->url = $config['base_url']; + } + + $this->chat_instance = new SafeToolsOpenAIChat( $openai_config ); + } + + private function initialize_anthropic( array $config ): void { + if ( ! isset( $config['api_key'] ) ) { + throw new Exception( 'Anthropic requires an API key' ); + } + + $config = array_merge( [ + 'model' => 'claude-3-opus-20240229', + ], $config ); + + $this->chat_instance = new AnthropicChat( + $config['api_key'], + $config['model'] + ); + } + + + public function ensure_model( string $model ): bool { + if ( $this->provider === 'lmstudio' ) { + // For LM Studio, check if model is available via OpenAI-compatible API + return $this->check_lm_studio_model_availability( $model ); + } + + // For cloud providers (OpenAI, Anthropic), models are always available + return true; + } + + /** + * Check if a model is available in LM Studio via OpenAI-compatible API + * + * @param string $model Model name to check. + * + * @return bool True if model is available, false otherwise + */ + private function check_lm_studio_model_availability( string $model ): bool { + // Get base URL from config, default to LM Studio default + $base_url = $this->config['base_url'] ?? 'http://localhost:1234/v1'; + $models_endpoint = rtrim( $base_url, '/' ) . '/models'; + + $this->log_info( 'Checking LM Studio model availability', [ + 'model' => $model, + 'endpoint' => $models_endpoint, + ] ); + + // Make API call to check available models + $ch = curl_init( $models_endpoint ); + curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); + curl_setopt( $ch, CURLOPT_TIMEOUT, 10 ); + curl_setopt( $ch, CURLOPT_HTTPHEADER, [ + 'Content-Type: application/json', + 'Authorization: Bearer ' . ( $this->config['api_key'] ?? 'dummy' ), + ] ); + + $response = curl_exec( $ch ); + $http_code = curl_getinfo( $ch, CURLINFO_HTTP_CODE ); + $error = curl_error( $ch ); + curl_close( $ch ); + + if ( $http_code !== 200 ) { + $this->log_error( 'Failed to check LM Studio models', [ + 'http_code' => $http_code, + 'error' => $error, + 'response' => substr( $response, 0, 500 ), + ] ); + + // If we can't check, assume model is available (LM Studio might be starting up) + return true; + } + + $data = json_decode( $response, true ); + if ( ! isset( $data['data'] ) || ! is_array( $data['data'] ) ) { + $this->log_error( 'Invalid response format from LM Studio models endpoint', [ + 'response' => substr( $response, 0, 500 ), + ] ); + + // If response format is unexpected, assume model is available + return true; + } + + // Check if the requested model is in the list of available models + foreach ( $data['data'] as $available_model ) { + if ( isset( $available_model['id'] ) && $available_model['id'] === $model ) { + $this->log_info( 'Model found in LM Studio', [ 'model' => $model ] ); + + return true; + } + } + + $this->log_info( 'Model not found in LM Studio', [ + 'model' => $model, + 'available_models' => array_column( $data['data'], 'id' ), + ] ); + + // Model not found, but for LM Studio this might mean: + // 1. Model needs to be loaded through LM Studio UI + // 2. Model name doesn't match exactly + // We'll return true and let the actual generation call handle the error + return true; + } + + /** + * @param string $message + * @param array $context + */ + private function log_info( string $message, array $context = [] ): void { + if ( $this->logger ) { + $this->logger->log_info( $message, $context ); + } + } + + /** + * @param string $message + * @param array $context + */ + private function log_error( string $message, array $context = [] ): void { + if ( $this->logger ) { + $this->logger->log_error( $message, $context ); + } + } + + public function get_provider(): string { + return $this->provider; + } + + // Static instance management for global access + private static ?LLPhantBootstrap $instance = null; + + public static function setInstance( LLPhantBootstrap $instance ): void { + self::$instance = $instance; + } + + public static function getInstance(): ?LLPhantBootstrap { + return self::$instance; + } + + public static function getCurrentProvider(): ?string { + return self::$instance ? self::$instance->get_provider() : null; + } + + public static function getProvider(): ?string { + return self::$current_provider; + } + + public static function getModel(): ?string { + return self::$current_model; + } + + /** + * @return mixed Chat instance. + */ + public static function getChat() { + return self::$chat; + } +} diff --git a/src/src/AI/webserver/Lib/OutboundRequest.php b/src/src/AI/webserver/Lib/OutboundRequest.php new file mode 100644 index 000000000..daecdfd8f --- /dev/null +++ b/src/src/AI/webserver/Lib/OutboundRequest.php @@ -0,0 +1,244 @@ + */ + private array $data; + + /** @var array */ + private array $config; + + private string $url; + private string $method; + private string $type; + + /** @var array */ + private array $default_config = [ + 'timeout' => 30, + 'max_retries' => 3, + 'retry_delay' => 1, + 'validate_ssl' => true, + 'follow_redirects' => true, + ]; + + private JsonSchemaValidator $validator; + + /** + * @param array $data + * @param array $config + */ + public function __construct( string $url, string $method, array $data = [], array $config = [], string $type = 'request' ) { + $this->url = $url; + $this->method = strtoupper( $method ); + $this->data = $data; + $this->config = array_merge( $this->default_config, $config ); + $this->type = $type; + $this->validator = JsonSchemaValidator::getInstance(); + } + + /** + * @param array $data + * @return array + */ + public static function callback( string $url, array $data = [] ): array { + $request = new self( $url, 'POST', $data, [], 'callback' ); + + return $request->send(); + } + + /** + * @param array $data + * @return array + */ + public static function task_event( string $url, array $data = [] ): array { + $request = new self( $url, 'POST', $data, [], 'task_event' ); + + return $request->send(); + } + + /** + * @param array $data + * @return array + */ + public static function heartbeat( string $url, array $data = [] ): array { + $request = new self( $url, 'POST', $data, [], 'heartbeat' ); + + return $request->send(); + } + + /** + * @param array $data + * @return array + */ + public static function node_management( string $url, array $data = [] ): array { + $request = new self( $url, 'POST', $data ); + + return $request->send(); + } + + /** + * @param array $payload + * @return array + */ + public static function node_registration( string $url, array $payload = [] ): array { + $request = new self( $url, 'POST', $payload ); + + return $request->send(); + } + + /** + * Send the request + * + * @return array + */ + public function send(): array { + try { + // Validate outbound data + $validation_result = $this->validate_schema( 'outbound' ); + if ( ! $validation_result['valid'] ) { + return [ + 'success' => false, + 'error' => 'Validation failed: ' . implode( ', ', $validation_result['errors'] ), + ]; + } + + // Send with retry logic + return $this->send_with_retry(); + } catch ( Exception $e ) { + return [ + 'success' => false, + 'error' => $e->getMessage(), + ]; + } + } + + /** + * @return array + */ + private function validate_schema( string $direction ): array { + if ( $direction === 'outbound' ) { + return $this->validator->validate_outbound( $this->data, $this->type ); + } else { + return $this->validator->validate_inbound( $this->data, $this->type ); + } + } + + /** + * @return array + */ + private function send_with_retry(): array { + $max_retries = $this->config['max_retries']; + $retry_delay = $this->config['retry_delay']; + $last_error = null; + + for ( $attempt = 0; $attempt <= $max_retries; $attempt++ ) { + try { + $result = $this->send_single_request(); + + if ( $result['success'] ) { + return $result; + } + + $last_error = $result['error'] ?? 'Unknown error'; + } catch ( Exception $e ) { + $last_error = $e->getMessage(); + } + + // Don't sleep after the last attempt + if ( $attempt < $max_retries ) { + sleep( $retry_delay ); + } + } + + $result = [ + 'success' => false, + 'error' => $last_error, + ]; + + return $result; + } + + /** + * @return array + */ + private function send_single_request(): array { + $context_options = [ + 'http' => [ + 'method' => $this->method, + 'header' => [ + 'Content-Type: application/json', + 'Accept: application/json', + ], + 'content' => json_encode( $this->data ), + 'timeout' => $this->config['timeout'], + ], + 'ssl' => [ + 'verify_peer' => $this->config['validate_ssl'], + 'verify_peer_name' => $this->config['validate_ssl'], + ], + ]; + + $context = stream_context_create( $context_options ); + $result = file_get_contents( $this->url, false, $context ); + + if ( $result === false ) { + throw new Exception( 'Failed to send request' ); + } + + $response_data = json_decode( $result, true ); + if ( json_last_error() !== JSON_ERROR_NONE ) { + throw new Exception( 'Invalid JSON response: ' . json_last_error_msg() ); + } + + // Validate response if it's an array + if ( is_array( $response_data ) ) { + $validation_result = $this->validator->validate_inbound( $response_data, $this->type ); + if ( ! $validation_result['valid'] ) { + return [ + 'success' => false, + 'error' => 'Response validation failed: ' . implode( ', ', $validation_result['errors'] ), + ]; + } + } + + $response = $response_data ?? $result; + + return [ + 'success' => true, + 'response' => $response, + 'url' => $this->url, + 'method' => $this->method, + ]; + } + + /** + * @return array + */ + public function get_data(): array { + return $this->data; + } + + /** + * @param array $data + */ + public function set_data( array $data ): void { + $this->data = $data; + } + + /** + * @return array + */ + public function get_config(): array { + return $this->config; + } + + /** + * @param array $config + */ + public function set_config( array $config ): void { + $this->config = array_merge( $this->config, $config ); + } +} diff --git a/src/src/AI/webserver/Lib/PromptContext.php b/src/src/AI/webserver/Lib/PromptContext.php new file mode 100644 index 000000000..dae1f9e19 --- /dev/null +++ b/src/src/AI/webserver/Lib/PromptContext.php @@ -0,0 +1,32 @@ +" • {$r}", $roots ) ); + + return <<{"name":"list_files","arguments":{"directory":"."}} + to see the root listing at any time. + +──────────────── Current Roots ───────────────── +{$root_list} + +TXT; + } +} diff --git a/src/src/AI/webserver/Lib/ToolPathGuard.php b/src/src/AI/webserver/Lib/ToolPathGuard.php new file mode 100644 index 000000000..d3ef7894b --- /dev/null +++ b/src/src/AI/webserver/Lib/ToolPathGuard.php @@ -0,0 +1,110 @@ +work_dir = rtrim( str_replace( '\\', '/', $real ), '/' ); + $this->sut_dir = rtrim( $sut_dir, '/' ); + } + + /** + * Resolve user path to absolute path, trying both WP-relative and SUT-relative notation + * + * @param string $user_path The raw path coming from the LLM/tool‑call. + * @return string Absolute path to existing file + * @throws \RuntimeException On any contract violation or if file not found. + */ + public function resolve( string $user_path ): string { + $user_path = ltrim( str_replace( '\\', '/', $user_path ), '/' ); + + // Check if path still contains unresolved placeholders + if ( preg_match( '/__(?:WP_ROOT|SUT_DIR|DEP_\[[^\]]+\])__/', $user_path ) ) { + throw new \RuntimeException( + "Unresolved placeholder in path: {$user_path}. " . + 'Placeholders should have been resolved before reaching path guard.' + ); + } + + // ① absolute "WP‑relative" wp-content/plugins/… + $cand1 = "{$this->work_dir}/{$user_path}"; + + // ② "SUT‑relative" includes/admin.php ⇒ wpRoot/sutDir/… + $cand2 = "{$this->work_dir}/{$this->sut_dir}/{$user_path}"; + + foreach ( [ $cand1, $cand2 ] as $p ) { + if ( ( is_file( $p ) || is_dir( $p ) ) && substr( realpath( $p ), 0, strlen( $this->work_dir ) ) === $this->work_dir ) { + return $p; + } + } + throw new \RuntimeException( "Path outside workspace: {$user_path}" ); + } + + /** + * @param string $path The raw path coming from the LLM/tool‑call. + * @return string Normalised *relative* path + * @throws \RuntimeException On any contract violation. + */ + public function normalise( string $path ): string { + // ① Canonicalise separators, trim whitespace + $path = str_replace( '\\', '/', trim( $path ) ); + + // ② Fast contract checks (Path‑Contract v3) + if ( $path === '' || $path[0] === '/' ) { + throw new \RuntimeException( "Path must be root‑relative (no leading '/'): {$path}" ); + } + if ( str_contains( $path, '..' ) ) { + throw new \RuntimeException( "Path must not contain '..' segments: {$path}" ); + } + if ( ! preg_match( '#^[A-Za-z0-9_/\.\-]+$#', $path ) ) { + throw new \RuntimeException( "Path contains invalid characters: {$path}" ); + } + + // ③ Build absolute candidate *inside* workspace + $candidate = $this->work_dir . '/' . $path; + $real = realpath( $candidate ) ?: $this->pseudo_realpath( $candidate ); + + // ④ Still inside workspace? + if ( $real !== $this->work_dir && substr( $real, 0, strlen( $this->work_dir . '/' ) ) !== $this->work_dir . '/' ) { + throw new \RuntimeException( "Path escapes workspace: {$path}" ); + } + + // ⑤ Return relative + return ltrim( substr( $real, strlen( $this->work_dir ) ), '/' ); + } + + /** Fallback when file does not exist yet (string‑based realpath) */ + private function pseudo_realpath( string $path ): string { + $parts = []; + foreach ( explode( '/', preg_replace( '#/+#', '/', $path ) ) as $part ) { + if ( $part === '' || $part === '.' ) { + continue; + } + if ( $part === '..' ) { + array_pop( $parts ); + continue; } + $parts[] = $part; + } + return '/' . implode( '/', $parts ); + } +} diff --git a/src/src/AI/webserver/NodeResponse.php b/src/src/AI/webserver/NodeResponse.php new file mode 100644 index 000000000..9eaf317fb --- /dev/null +++ b/src/src/AI/webserver/NodeResponse.php @@ -0,0 +1,241 @@ + */ + private array $data; + /** @var array */ + private array $meta; + + /** + * @param array $data + * @param array $meta + */ + public function __construct( string $status, array $data = [], array $meta = [] ) { + $this->status = $status; + $this->data = $data; + $this->meta = $meta; + } + + /** + * @param array $data + * @return array + */ + public static function mark( string $status, array $data = [] ): array { + return [ + 'status' => $status, + 'data' => $data, + 'meta' => [ + 'timestamp' => time(), + 'node_id' => getenv( 'QIT_NODE_ID' ) ?: 'unknown', + ], + ]; + } + + /** + * @param array $provider_response + * @param array $additional + * @return array + */ + public static function prompt( string $result, array $provider_response = [], array $additional = [] ): array { + $current_provider = LLPhantBootstrap::getCurrentProvider() ?? 'unknown'; + + $response = [ + 'status' => 'completed', + 'result' => $result, + 'meta' => [ + 'timestamp' => time(), + 'provider' => $current_provider, + 'provider_response' => $provider_response, + 'node_id' => getenv( 'QIT_NODE_ID' ) ?: 'unknown', + ], + ]; + + // Add provider stats if available + $provider_stats = Benchmark::extractProviderStats( $provider_response ); + if ( ! empty( $provider_stats ) ) { + $response['meta']['provider_stats'] = $provider_stats; + } + + // Merge additional data + if ( ! empty( $additional ) ) { + $response = array_merge_recursive( $response, $additional ); + } + + // Enhance with benchmark data + return Benchmark::enhanceResponse( $response ); + } + + /** + * @param array $tool_calls + * @param array $additional + * @return array + */ + public static function tool_prompt( string $result, array $tool_calls = [], array $additional = [] ): array { + $current_provider = LLPhantBootstrap::getCurrentProvider() ?? 'unknown'; + + $response = [ + 'status' => 'completed', + 'result' => $result, + 'tool_calls' => $tool_calls, + 'meta' => [ + 'timestamp' => time(), + 'provider' => $current_provider, + 'tool_count' => count( $tool_calls ), + 'node_id' => getenv( 'QIT_NODE_ID' ) ?: 'unknown', + ], + ]; + + // Merge additional data + if ( ! empty( $additional ) ) { + $response = array_merge_recursive( $response, $additional ); + } + + // Enhance with benchmark data + return Benchmark::enhanceResponse( $response ); + } + + /** + * @param array $stats + * @param array $additional + * @return array + */ + public static function extraction( string $result, array $stats = [], array $additional = [] ): array { + $response = [ + 'status' => 'completed', + 'result' => $result, + 'meta' => [ + 'timestamp' => time(), + 'extraction_stats' => $stats, + 'node_id' => getenv( 'QIT_NODE_ID' ) ?: 'unknown', + ], + ]; + + // Merge additional data + if ( ! empty( $additional ) ) { + $response = array_merge_recursive( $response, $additional ); + } + + // Enhance with benchmark data + return Benchmark::enhanceResponse( $response ); + } + + /** + * @param array $meta + * @return array + */ + public static function success( string $message = 'Operation completed successfully', array $meta = [] ): array { + $response = [ + 'status' => 'success', + 'message' => $message, + 'meta' => array_merge( [ + 'timestamp' => time(), + 'node_id' => getenv( 'QIT_NODE_ID' ) ?: 'unknown', + ], $meta ), + ]; + + // Enhance with benchmark data + return Benchmark::enhanceResponse( $response ); + } + + /** + * @param array $details + * @return array + */ + public static function error( string $message, int $code = 500, array $details = [] ): array { + $response = [ + 'status' => 'error', + 'message' => $message, + 'code' => $code, + 'meta' => [ + 'timestamp' => time(), + 'node_id' => getenv( 'QIT_NODE_ID' ) ?: 'unknown', + ], + ]; + + if ( ! empty( $details ) ) { + $response['details'] = $details; + } + + // Enhance with benchmark data + return Benchmark::enhanceResponse( $response ); + } + + /** + * @param array $manager_response + * @return array + */ + public static function from_manager( array $manager_response ): array { + $response = [ + 'status' => 'completed', + 'manager_response' => $manager_response, + 'meta' => [ + 'timestamp' => time(), + 'node_id' => getenv( 'QIT_NODE_ID' ) ?: 'unknown', + ], + ]; + + // Enhance with benchmark data + return Benchmark::enhanceResponse( $response ); + } +} + +// Add the missing Benchmark class for static method calls +class Benchmark { + /** + * @param array $provider_response + * @return array + */ + public static function extractProviderStats( array $provider_response ): array { + // Extract provider statistics from response + return [ + 'tokens_used' => $provider_response['tokens_used'] ?? 0, + 'model' => $provider_response['model'] ?? 'unknown', + 'response_time' => $provider_response['response_time'] ?? 0, + ]; + } + + /** + * @param array $response + * @return array + */ + public static function enhanceResponse( array $response ): array { + // Add benchmark/performance data to response + $response['meta']['benchmark'] = [ + 'memory_usage' => memory_get_usage( true ), + 'peak_memory_usage' => memory_get_peak_usage( true ), + 'execution_time' => microtime( true ) - ( $_SERVER['REQUEST_TIME_FLOAT'] ?? microtime( true ) ), + ]; + + return $response; + } + + /** + * CamelCase alias for tool_prompt method + * + * @param string $result + * @param array $tool_calls + * @param array $model + * @param array $additional + * @return array + */ + public static function toolPrompt( string $result, array $tool_calls = [], array $model = [], array $additional = [] ): array { + $response = \QIT_AI_Webserver\NodeResponse::tool_prompt( $result, $tool_calls, array_merge( $model, $additional ) ); + return $response; + } +} diff --git a/src/src/AI/webserver/PathContextProvider.php b/src/src/AI/webserver/PathContextProvider.php new file mode 100644 index 000000000..88b50abff --- /dev/null +++ b/src/src/AI/webserver/PathContextProvider.php @@ -0,0 +1,105 @@ +work_dir = rtrim( $work_directory, '/\\' ); + $this->sut_dir = ltrim( $sut_directory, '/' ); + } + + /** + * Get path context data - same functionality as PathContextTool::do() + * + * @return array Context data with wp_root, sut, deps, dep_count, truncated. + * @throws \RuntimeException If directories don't exist. + */ + public function get_path_context(): array { + $wp_root = $this->work_dir; + $sut_path = rtrim( $wp_root . '/' . $this->sut_dir, '/' ); + $sut_slug = basename( $this->sut_dir ); + + if ( ! file_exists( $wp_root ) ) { + throw new \RuntimeException( "WP_ROOT directory does not exist: $wp_root" ); + } + + if ( ! file_exists( $sut_path ) || ! is_dir( $sut_path ) ) { + throw new \RuntimeException( "SUT directory does not exist: $sut_path" ); + } + + $deps = []; + $total_deps = 0; + $truncated = false; + + /** Helper: shallow listing */ + $ls1 = static function ( string $dir ): array { + $items = @scandir( $dir ); + if ( $items === false ) { + return []; + } + $entries = []; + foreach ( $items as $i ) { + if ( $i === '.' || $i === '..' ) { + continue; + } + $entries[] = $i; + if ( count( $entries ) >= 50 ) { + $entries[] = '…'; + break; + } + } + + return $entries; + }; + + /** Scan wp-content/(plugins|themes) */ + foreach ( [ 'plugins', 'themes' ] as $type_dir ) { + $base = $wp_root . '/wp-content/' . $type_dir; + + if ( ! file_exists( $base ) ) { + throw new \RuntimeException( "Base directory does not exist: $base" ); + } + + foreach ( scandir( $base ) ?: [] as $slug ) { + if ( $slug === '.' || $slug === '..' || $slug === $sut_slug ) { + continue; + } + + ++$total_deps; + if ( count( $deps ) >= 20 ) { // hard cap to keep JSON small + $truncated = true; + continue; + } + + $path = $base . '/' . $slug; + if ( ! is_dir( $path ) ) { + continue; + } + + $deps[] = [ + 'slug' => $slug, + 'type' => $type_dir === 'plugins' ? 'plugin' : 'theme', + 'path' => $path, + 'ls' => $ls1( $path ), + ]; + } + } + + return [ + 'wp_root' => $wp_root, + 'sut' => $sut_path, + 'deps' => $deps, + 'dep_count' => $total_deps, + 'truncated' => $truncated, + ]; + } +} diff --git a/src/src/AI/webserver/ToolContext.php b/src/src/AI/webserver/ToolContext.php new file mode 100644 index 000000000..514090705 --- /dev/null +++ b/src/src/AI/webserver/ToolContext.php @@ -0,0 +1,31 @@ + */ + public $deps; + + /** @var array|null */ + public $path_context; + + /** @var \QIT_AI_Webserver\Lib\FactStore|null */ + public $fact_store; + + /** + * @param array $deps + */ + public function __construct( string $wp_root, string $sut_dir, array $deps ) { + $this->wpRoot = $wp_root; + $this->sutDir = $sut_dir; + $this->deps = $deps; + $this->path_context = null; + $this->fact_store = null; + } +} diff --git a/src/src/AI/webserver/ToolRegistry.php b/src/src/AI/webserver/ToolRegistry.php new file mode 100644 index 000000000..ab75cf7de --- /dev/null +++ b/src/src/AI/webserver/ToolRegistry.php @@ -0,0 +1,112 @@ + */ + private array $tools = []; + + public function __construct( ToolContext $context ) { + $this->context = $context; + $this->register_tools(); + } + + private function register_tools(): void { + $base_path = dirname( __DIR__ ) . '/Tools'; + $work_dir = $this->context->wpRoot; + $sut_dir = $this->context->sutDir; + + // Register all available tools + $tool_classes = [ + 'ListFilesTool', + 'ReadFileTool', + 'SearchStringsTool', + 'TreeDirectoryTool', + 'ParsePhpTool', + 'FindHooksTool', + 'ListFactsTool', + 'SearchFactsTool', + ]; + + foreach ( $tool_classes as $class_name ) { + $full_class = "\\QIT_AI_Webserver\\Tools\\{$class_name}"; + if ( class_exists( $full_class ) ) { + $tool = new $full_class( $work_dir, $sut_dir, $this->context ); + $this->tools[ $tool->get_name() ] = $tool; + } + } + + // Add path context to the registry (not as a tool, but for context) + $path_provider = new PathContextProvider( $work_dir, $sut_dir ); + $path_context = $path_provider->get_path_context(); + + // Store path context for use by tools if needed + $this->context->path_context = $path_context; + } + + public function register_tool( BaseTool $tool ): void { + $this->tools[ $tool->get_name() ] = $tool; + } + + public function get_tool( string $name ): ?BaseTool { + return $this->tools[ $name ] ?? null; + } + + /** + * Get all available tools + * + * @return array + */ + public function get_tools(): array { + return $this->tools; + } + + /** + * Get all available tools (camelCase alias) + * + * @return array + */ + public function getTools(): array { + return $this->get_tools(); + } + + /** + * Get tool by name (camelCase alias) + */ + public function getTool( string $name ): ?BaseTool { + return $this->get_tool( $name ); + } + + /** + * Execute a tool by name + * + * @param array $params + * @return array + */ + public function execute_tool( string $tool_name, array $params ): array { + if ( ! isset( $this->tools[ $tool_name ] ) ) { + throw new \InvalidArgumentException( "Tool not found: {$tool_name}" ); + } + + return $this->tools[ $tool_name ]->execute( $params ); + } +} diff --git a/src/src/AI/webserver/Tools/BaseTool.php b/src/src/AI/webserver/Tools/BaseTool.php new file mode 100644 index 000000000..dbd11cb2b --- /dev/null +++ b/src/src/AI/webserver/Tools/BaseTool.php @@ -0,0 +1,195 @@ +work_dir = rtrim( $work_directory, '/\\' ); + $this->file_path_resolver = new FilePathResolver( $this->work_dir, $sut_directory ); + $this->context = $context; + } + + /** + * Get the tool name + */ + abstract public function get_name(): string; + + /** + * Get the tool description + */ + abstract public function get_description(): string; + + /** + * Get the FunctionInfo object for LLPhant + */ + abstract public function get_function_info(): FunctionInfo; + + /** + * Get the FunctionInfo object for LLPhant (camelCase alias) + */ + public function getFunctionInfo(): FunctionInfo { + return $this->get_function_info(); + } + + /** Canonical, safe, *relative* path – throws if invalid */ + protected function safe_path( string $user_path ): string { + return $this->file_path_resolver->canon_relative( $user_path ); + } + + /** + * @param array $examples + */ + protected function base_description( string $core, array $examples = [] ): string { + if ( $this->context === null ) { + return $core; + } + $deps = array_map( fn( $d ) => $d['slug'], $this->context->deps ); + $macro_note = sprintf( + "\n\nPath placeholders:\n• __WP_ROOT__ = %s\n• __SUT_DIR__ = %s\n• __DEP_[slug]__ where slug ∈ {%s}", + $this->context->wpRoot, + $this->context->sutDir, + implode( ', ', $deps ) ?: '–' + ); + + // Add examples if provided + if ( ! empty( $examples ) ) { + $macro_note .= "\n\nExamples:"; + foreach ( $examples as $example ) { + $macro_note .= "\n• " . $example; + } + } + + return $core . $macro_note; + } + + /** + * Child classes must implement the "real" work here. + * On success return ANY serialisable value. + * + * @param array $params + * @return mixed + * @throws \Throwable To trigger error envelope. + */ + abstract protected function do( array $params ); + + /** + * @param array $params + * @return array + */ + public function execute( array $params ): array { + try { + // Resolve placeholders + $resolved_params = $this->resolve_macros_in_params( $params ); + $data = $this->do( $resolved_params ); + + return [ + 'success' => true, + 'data' => $data, + 'truncated' => $data['truncated'] ?? false, + 'error' => null, + 'debug' => [], + ]; + } catch ( Exception | \Throwable $e ) { + DebugLogger::log( static::class . '_error', [ + 'args' => $params, + 'error' => $e->getMessage(), + 'tree' => DebugLogger::dir_tree( $this->work_dir, 2, 150 ), + ] ); + + return [ + 'success' => false, + 'data' => null, + 'truncated' => false, + 'error' => $e->getMessage(), + 'debug' => [ 'args' => $params ], + ]; + } + } + + /** + * Resolve macros in path-related parameters + * + * @param array $params + * @return array + */ + protected function resolve_macros_in_params( array $params ): array { + // Common path parameters that might contain macros + $path_params = [ 'file', 'directory_or_file', 'path', 'directory' ]; + + foreach ( $path_params as $param_name ) { + if ( isset( $params[ $param_name ] ) && is_string( $params[ $param_name ] ) ) { + $params[ $param_name ] = $this->resolve_macro_path( $params[ $param_name ] ); + } + } + + return $params; + } + + /** + * Resolve macro path to regular relative path + */ + protected function resolve_macro_path( string $user_path ): string { + if ( $this->context === null ) { + return $user_path; + } + + // Much simpler pattern matching with underscores + $user_path = trim( $user_path ); + + // Handle __WP_ROOT__ + if ( strpos( $user_path, '__WP_ROOT__' ) === 0 ) { + $remainder = substr( $user_path, 11 ); // length of '__WP_ROOT__' + return ltrim( $remainder, '/' ); + } + + // Handle __SUT_DIR__ + if ( strpos( $user_path, '__SUT_DIR__' ) === 0 ) { + $remainder = substr( $user_path, 11 ); // length of '__SUT_DIR__' + $sut_relative = $this->file_path_resolver->to_relative( $this->context->sutDir ); + + if ( empty( $remainder ) || $remainder === '/' ) { + return $sut_relative; + } else { + return $sut_relative . '/' . ltrim( $remainder, '/' ); + } + } + + // Handle __DEP_[slug]__ + if ( preg_match( '/^__DEP_\[([^\]]+)\]__(.*)$/', $user_path, $matches ) ) { + $dep_slug = $matches[1]; + $dep_path = $matches[2]; + + foreach ( $this->context->deps as $dep ) { + if ( $dep['slug'] === $dep_slug ) { + $base_path = $dep['type'] === 'plugin' + ? "wp-content/plugins/{$dep_slug}" + : "wp-content/themes/{$dep_slug}"; + + if ( empty( $dep_path ) || $dep_path === '/' ) { + return $base_path; + } else { + return $base_path . '/' . ltrim( $dep_path, '/' ); + } + } + } + throw new \InvalidArgumentException( "Unknown dependency: {$dep_slug}" ); + } + + // No placeholder found, return as-is + return $user_path; + } +} diff --git a/src/src/AI/webserver/Tools/FindHooksTool.php b/src/src/AI/webserver/Tools/FindHooksTool.php new file mode 100644 index 000000000..0a589030d --- /dev/null +++ b/src/src/AI/webserver/Tools/FindHooksTool.php @@ -0,0 +1,213 @@ +get_name(), + [ $this, 'find_hooks' ], + $this->get_description(), + $params, + [] // no required parameters + ); + } + + /** + * @param array|null $hook_names + * @param array|null $callbacks + */ + public function find_hooks( + ?string $type = null, + ?array $hook_names = null, + ?array $callbacks = null, + string $directory = '.', + int $max_results = 100, + int $max_depth = 10 + ): string { + $res = $this->execute( compact( + 'type', 'hook_names', 'callbacks', 'directory', 'max_results', 'max_depth' + ) ); + + return json_encode( $res, JSON_UNESCAPED_SLASHES ); + } + + /** + * @param array $p + * @return array + */ + protected function do( array $p ) { + $type_filter = $p['type'] ?? null; // action|filter|both|null + $hooks_filter = $p['hook_names'] ?? null; // array|null + $cb_filter = $p['callbacks'] ?? null; // array|null + $directory = $p['directory'] ?? '.'; + $max_results = $p['max_results'] ?? 100; + $max_depth = $p['max_depth'] ?? 10; + + $directory = $this->safe_path( $directory ); + + $abs_dir = $this->file_path_resolver->to_absolute( $directory ); + + $files = []; + $results = []; + + // Recursively collect PHP files + $this->collect_php_files( $abs_dir, $files, 0, $max_depth ); + + $parser = ( new ParserFactory() )->create( ParserFactory::PREFER_PHP7 ); + $finder = new NodeFinder(); + + foreach ( $files as $filepath ) { + if ( count( $results ) >= $max_results ) { + break; + } + + try { + $code = file_get_contents( $filepath ); + $ast = $parser->parse( $code ); + + if ( ! $ast ) { + continue; + } + + // Find function calls + $func_calls = $finder->findInstanceOf( $ast, 'PhpParser\Node\Expr\FuncCall' ); + + foreach ( $func_calls as $call ) { + if ( count( $results ) >= $max_results ) { + break 2; + } + + if ( ! isset( $call->name->name ) ) { + continue; + } + + $func_name = $call->name->name; + + // Filter by type + if ( $type_filter && $type_filter !== 'both' ) { + if ( $type_filter === 'action' && $func_name !== 'add_action' ) { + continue; + } + if ( $type_filter === 'filter' && $func_name !== 'add_filter' ) { + continue; + } + } elseif ( ! in_array( $func_name, [ 'add_action', 'add_filter' ], true ) ) { + continue; + } + + $args = $call->getArgs(); + + if ( count( $args ) < 2 ) { + continue; + } + + // Extract hook name + $hook_name = null; + $callback = null; + + if ( $args[0] && $args[0]->value instanceof \PhpParser\Node\Scalar\String_ ) { + $hook_name = $args[0]->value->value; + } + + // Extract callback + if ( $args[1] ) { + $printer = new PrettyPrinter\Standard(); + $callback = $printer->prettyPrintExpr( $args[1]->value ); + } + + // Apply filters + if ( $hooks_filter && ! in_array( $hook_name, $hooks_filter, true ) ) { + continue; + } + + if ( $cb_filter && ! $this->callback_matches( $callback, $cb_filter ) ) { + continue; + } + + $rel_path = $this->file_path_resolver->to_relative( $filepath ); + + $results[] = [ + 'file' => $rel_path, + 'function' => $func_name, + 'hook_name' => $hook_name, + 'callback' => $callback, + 'line' => $call->getLine(), + ]; + } + } catch ( \Exception $e ) { + // Skip files that can't be parsed + continue; + } + } + + return [ + 'matches' => $results, + 'truncated' => count( $results ) >= $max_results, + ]; + } + + private function collect_php_files( string $dir, array &$files, int $current_depth, int $max_depth ): void { + if ( $current_depth >= $max_depth ) { + return; + } + + if ( ! is_dir( $dir ) ) { + return; + } + + $items = glob( $dir . '/*' ); + if ( ! $items ) { + return; + } + + foreach ( $items as $item ) { + if ( is_file( $item ) && str_ends_with( $item, '.php' ) ) { + $files[] = $item; + } elseif ( is_dir( $item ) ) { + $this->collect_php_files( $item, $files, $current_depth + 1, $max_depth ); + } + } + } + + /** + * @param array $cb_filter + */ + private function callback_matches( ?string $callback, array $cb_filter ): bool { + if ( ! $callback ) { + return false; + } + + foreach ( $cb_filter as $filter ) { + if ( strpos( $callback, $filter ) !== false ) { + return true; + } + } + + return false; + } +} diff --git a/src/src/AI/webserver/Tools/ListFactsTool.php b/src/src/AI/webserver/Tools/ListFactsTool.php new file mode 100644 index 000000000..334735140 --- /dev/null +++ b/src/src/AI/webserver/Tools/ListFactsTool.php @@ -0,0 +1,81 @@ +get_name(), + [ $this, 'list_all_facts' ], + $this->get_description(), + $params, + [] // no required parameters + ); + } + + public function get_function_info(): FunctionInfo { + return $this->getFunctionInfo(); + } + + public function list_all_facts( + int $limit = 50, + ?string $category = null, + ?string $type = null + ): string { + $result = $this->execute( compact( 'limit', 'category', 'type' ) ); + + return json_encode( $result, JSON_UNESCAPED_SLASHES ); + } + + /** + * @param array $p + * @return array + */ + protected function do( array $p ) { + $limit = $p['limit'] ?? 50; + $category = $p['category'] ?? null; + $type = $p['type'] ?? null; + + $fact_store = $this->context ? $this->context->fact_store : null; + if ( ! $fact_store ) { + return [ + 'facts' => [], + 'total' => 0, + 'error' => 'No fact store available', + ]; + } + + $filters = []; + if ( $category ) { + $filters['category'] = $category; + } + if ( $type ) { + $filters['type'] = $type; + } + + $facts = $fact_store->list_all( $filters, $limit ); + + return [ + 'facts' => $facts, + 'total' => count( $facts ), + ]; + } +} diff --git a/src/src/AI/webserver/Tools/ListFilesTool.php b/src/src/AI/webserver/Tools/ListFilesTool.php new file mode 100644 index 000000000..d3ba09b61 --- /dev/null +++ b/src/src/AI/webserver/Tools/ListFilesTool.php @@ -0,0 +1,127 @@ +execute( [ 'directory' => $directory ] ); + + return json_encode( $result, JSON_UNESCAPED_SLASHES ); + } + + public function get_function_info(): FunctionInfo { + $params = [ + new Parameter( 'directory', 'string', 'Directory to list (default: root)' ), + ]; + + return new FunctionInfo( + $this->get_name(), + [ $this, 'list_files' ], + $this->get_description(), + $params, + [] // no required parameters + ); + } + + protected function do( array $params ) { + $directory = $this->safe_path( $params['directory'] ?? '.' ); + + // Normalize directory path + $relative_dir = $directory; + if ( $relative_dir === '.' || $relative_dir === '' ) { + $absolute_dir = $this->work_dir; + } else { + $absolute_dir = $this->file_path_resolver->to_absolute( $relative_dir ); + } + + // Verify directory is within bounds + $real_work_dir = realpath( $this->work_dir ); + $real_dir = realpath( $absolute_dir ); + + if ( $real_dir === false || strpos( $real_dir, $real_work_dir ) !== 0 ) { + DebugLogger::log( 'list_files_error', [ + 'reason' => 'directory_not_found_or_outside_bounds', + 'directory' => $directory, + 'absolute_dir' => $absolute_dir, + 'work_dir' => $this->work_dir, + 'dir_tree' => DebugLogger::dir_tree( $this->work_dir ), + ] ); + + throw new \InvalidArgumentException( 'Directory not found or outside bounds: ' . $directory ); + } + + if ( ! is_dir( $absolute_dir ) ) { + DebugLogger::log( 'list_files_error', [ + 'reason' => 'not_a_directory', + 'directory' => $directory, + 'absolute_dir' => $absolute_dir, + 'work_dir' => $this->work_dir, + 'dir_tree' => DebugLogger::dir_tree( dirname( $absolute_dir ) ), + ] ); + + throw new \InvalidArgumentException( 'Directory not found: ' . $directory ); + } + + $files = []; + $dirs = []; + + $items = @scandir( $absolute_dir ); + if ( $items === false ) { + DebugLogger::log( 'list_files_error', [ + 'reason' => 'cannot_read_directory', + 'directory' => $directory, + 'absolute_dir' => $absolute_dir, + 'work_dir' => $this->work_dir, + 'dir_tree' => DebugLogger::dir_tree( dirname( $absolute_dir ) ), + ] ); + + throw new \RuntimeException( 'Cannot read directory: ' . $directory ); + } + + foreach ( $items as $item ) { + if ( $item === '.' || $item === '..' ) { + continue; + } + + $item_path = $absolute_dir . '/' . $item; + $relative_path = $this->file_path_resolver->to_relative( $item_path ); + + if ( is_dir( $item_path ) ) { + $dirs[] = $relative_path; + } else { + $files[] = [ + 'path' => $relative_path, + 'size' => filesize( $item_path ), + 'extension' => pathinfo( $item, PATHINFO_EXTENSION ), + ]; + } + } + + return [ + 'directory' => $relative_dir === '' ? '.' : $relative_dir, + 'files' => $files, + 'directories' => $dirs, + 'total_files' => count( $files ), + 'total_directories' => count( $dirs ), + ]; + } + + public function __invoke( string $directory = '.' ): string { + $result = $this->execute( [ 'directory' => $directory ] ); + + return json_encode( $result ); + } +} diff --git a/src/src/AI/webserver/Tools/ParsePhpTool.php b/src/src/AI/webserver/Tools/ParsePhpTool.php new file mode 100644 index 000000000..2ad6ac3b5 --- /dev/null +++ b/src/src/AI/webserver/Tools/ParsePhpTool.php @@ -0,0 +1,141 @@ +get_name(), + [ $this, 'parse_file' ], + $this->get_description(), + $params, + [ 'file' ] // required parameters + ); + } + + public function get_function_info(): FunctionInfo { + return $this->getFunctionInfo(); + } + + public function parse_file( string $file ): string { + $result = $this->execute( compact( 'file' ) ); + + return json_encode( $result, JSON_UNESCAPED_SLASHES ); + } + + /** + * @param array $p + * @return array + */ + protected function do( array $p ) { + $file = $this->safe_path( $p['file'] ); + + if ( ! file_exists( $file ) ) { + throw new \InvalidArgumentException( "File does not exist: {$file}" ); + } + + $abs_path = $this->file_path_resolver->to_absolute( $file ); + + try { + $parser_factory = new \PhpParser\ParserFactory(); + $parser = $parser_factory->create( \PhpParser\ParserFactory::PREFER_PHP7 ); + $code = file_get_contents( $abs_path ); + $ast = $parser->parse( $code ); + + if ( ! $ast ) { + throw new \RuntimeException( 'Failed to parse PHP file' ); + } + + // Extract useful information from AST + $visitor = new \PhpParser\NodeVisitor\NameResolver(); + $traverser = new \PhpParser\NodeTraverser(); + $traverser->addVisitor( $visitor ); + $ast = $traverser->traverse( $ast ); + + $info = $this->extract_ast_info( $ast ); + + return [ + 'file' => $file, + 'ast_info' => $info, + 'success' => true, + ]; + } catch ( \Exception $e ) { + throw new \RuntimeException( 'Error parsing PHP file: ' . $e->getMessage() ); + } + } + + /** + * @param array<\PhpParser\Node> $ast + * @return array + */ + private function extract_ast_info( array $ast ): array { + $info = [ + 'classes' => [], + 'functions' => [], + 'namespaces' => [], + 'uses' => [], + ]; + + $visitor = new class( $info ) extends \PhpParser\NodeVisitorAbstract { + /** @var array */ + private $info; + + /** + * @param array $info + */ + public function __construct( &$info ) { + $this->info = &$info; + } + + public function enterNode( \PhpParser\Node $node ) { + if ( $node instanceof \PhpParser\Node\Stmt\Class_ ) { + $this->info['classes'][] = [ + 'name' => $node->name ? $node->name->toString() : '', + 'line' => $node->getLine(), + ]; + } elseif ( $node instanceof \PhpParser\Node\Stmt\Function_ ) { + $this->info['functions'][] = [ + 'name' => $node->name->toString(), + 'line' => $node->getLine(), + ]; + } elseif ( $node instanceof \PhpParser\Node\Stmt\Namespace_ ) { + $this->info['namespaces'][] = [ + 'name' => $node->name ? $node->name->toString() : '', + 'line' => $node->getLine(), + ]; + } elseif ( $node instanceof \PhpParser\Node\Stmt\Use_ ) { + foreach ( $node->uses as $use ) { + $this->info['uses'][] = [ + 'name' => $use->name->toString(), + 'alias' => $use->alias ? $use->alias->toString() : null, + 'line' => $node->getLine(), + ]; + } + } + return null; + } + }; + + $traverser = new \PhpParser\NodeTraverser(); + $traverser->addVisitor( $visitor ); + $traverser->traverse( $ast ); + + return $info; + } +} diff --git a/src/src/AI/webserver/Tools/ReadFileTool.php b/src/src/AI/webserver/Tools/ReadFileTool.php new file mode 100644 index 000000000..ad5758769 --- /dev/null +++ b/src/src/AI/webserver/Tools/ReadFileTool.php @@ -0,0 +1,106 @@ +base_description( + 'Read file contents with optional line range filtering', + [ + 'Read entire file: file="src/MyClass.php"', + 'Read specific lines: file="src/MyClass.php", start_line=10, end_line=50', + ] + ); + } + + public function get_function_info(): FunctionInfo { + $params = [ + new Parameter( 'file', 'string', 'File path to read' ), + new Parameter( 'start_line', 'integer', 'Starting line number (1-based, optional)' ), + new Parameter( 'end_line', 'integer', 'Ending line number (1-based, optional)' ), + ]; + + return new FunctionInfo( + $this->get_name(), + [ $this, 'read_file_content' ], + $this->get_description(), + $params, + [ 'file' ] // required parameters + ); + } + + public function read_file_content( + string $file, + ?int $start_line = null, + ?int $end_line = null + ): string { + $result = $this->execute( compact( 'file', 'start_line', 'end_line' ) ); + + return json_encode( $result, JSON_UNESCAPED_SLASHES ); + } + + /** + * @param array $params + * @return array + */ + protected function do( array $params ) { + $file = $this->safe_path( $params['file'] ); + $start_line = $params['start_line'] ?? null; + $end_line = $params['end_line'] ?? null; + + $abs_path = $this->file_path_resolver->to_absolute( $file ); + + if ( ! file_exists( $abs_path ) ) { + throw new \InvalidArgumentException( "File does not exist: {$file}" ); + } + + if ( is_dir( $abs_path ) ) { + throw new \InvalidArgumentException( "Path is a directory, not a file: {$file}" ); + } + + $content = file_get_contents( $abs_path ); + if ( $content === false ) { + throw new \RuntimeException( "Failed to read file: {$file}" ); + } + + // If line range is specified, filter the content + if ( $start_line !== null || $end_line !== null ) { + $lines = explode( "\n", $content ); + $total_lines = count( $lines ); + + $start = max( 1, $start_line ?? 1 ) - 1; // Convert to 0-based + $end = min( $total_lines, $end_line ?? $total_lines ) - 1; // Convert to 0-based + + if ( $start > $end || $start >= $total_lines ) { + throw new \InvalidArgumentException( 'Invalid line range' ); + } + + $filtered_lines = array_slice( $lines, $start, $end - $start + 1 ); + $content = implode( "\n", $filtered_lines ); + + return [ + 'file' => $file, + 'content' => $content, + 'start_line' => $start + 1, + 'end_line' => $end + 1, + 'total_lines' => $total_lines, + 'truncated' => false, + ]; + } + + return [ + 'file' => $file, + 'content' => $content, + 'total_lines' => substr_count( $content, "\n" ) + 1, + 'truncated' => false, + ]; + } +} diff --git a/src/src/AI/webserver/Tools/SearchFactsTool.php b/src/src/AI/webserver/Tools/SearchFactsTool.php new file mode 100644 index 000000000..63d85eb5a --- /dev/null +++ b/src/src/AI/webserver/Tools/SearchFactsTool.php @@ -0,0 +1,72 @@ +get_name(), + [ $this, 'search_facts' ], + $this->get_description(), + $params, + [ 'query' ] // required parameters + ); + } + + public function get_function_info(): FunctionInfo { + return $this->getFunctionInfo(); + } + + public function search_facts( + string $query, + int $limit = 20 + ): string { + $result = $this->execute( compact( 'query', 'limit' ) ); + + return json_encode( $result, JSON_UNESCAPED_SLASHES ); + } + + /** + * @param array $p + * @return array + */ + protected function do( array $p ) { + $query = $p['query']; + $limit = $p['limit'] ?? 20; + + $fact_store = $this->context ? $this->context->fact_store : null; + if ( ! $fact_store ) { + return [ + 'results' => [], + 'total' => 0, + 'error' => 'No fact store available', + ]; + } + + $results = $fact_store->search( $query, $limit ); + + return [ + 'results' => $results, + 'total' => count( $results ), + 'query' => $query, + 'truncated' => count( $results ) >= $limit, + ]; + } +} diff --git a/src/src/AI/webserver/Tools/SearchStringsTool.php b/src/src/AI/webserver/Tools/SearchStringsTool.php new file mode 100644 index 000000000..be01c1cc8 --- /dev/null +++ b/src/src/AI/webserver/Tools/SearchStringsTool.php @@ -0,0 +1,168 @@ +base_description( + 'Search for strings/patterns in files using grep-like functionality', + [ + 'Search for function: needles=["function_name"], directory="src"', + 'Multiple patterns: needles=["class", "function"], file_types=["php"]', + ] + ); + } + + public function get_function_info(): FunctionInfo { + $params = [ + new Parameter( 'needles', 'array', 'Strings/patterns to search for', [], null, 'string' ), + new Parameter( 'directory', 'string', 'Directory to search in (default: ".")' ), + new Parameter( 'file_types', 'array', 'File extensions to include (default: ["php"])', [], null, 'string' ), + new Parameter( 'case_sensitive', 'boolean', 'Case sensitive search (default: false)' ), + new Parameter( 'max_results', 'integer', 'Maximum results (default: 100)' ), + new Parameter( 'max_depth', 'integer', 'Maximum directory depth (default: 10)' ), + ]; + + return new FunctionInfo( + $this->get_name(), + [ $this, 'search_strings' ], + $this->get_description(), + $params, + [ 'needles' ] // required parameters + ); + } + + /** + * @param array $needles + * @param array $file_types + */ + public function search_strings( + array $needles, + string $directory = '.', + array $file_types = [ 'php' ], + bool $case_sensitive = false, + int $max_results = 100, + int $max_depth = 10 + ): string { + $result = $this->execute( compact( + 'needles', 'directory', 'file_types', 'case_sensitive', 'max_results', 'max_depth' + ) ); + + return json_encode( $result, JSON_UNESCAPED_SLASHES ); + } + + /** + * @param array $p + * @return array + */ + protected function do( array $p ) { + $needles = $p['needles']; + $directory = $p['directory'] ?? '.'; + $file_types = $p['file_types'] ?? [ 'php' ]; + $case_sensitive = $p['case_sensitive'] ?? false; + $max_results = $p['max_results'] ?? 100; + $max_depth = $p['max_depth'] ?? 10; + + if ( empty( $needles ) || ! is_array( $needles ) ) { + throw new \InvalidArgumentException( 'needles parameter must be a non-empty array' ); + } + + $directory = $this->safe_path( $directory ); + $abs_dir = $this->file_path_resolver->to_absolute( $directory ); + + if ( ! is_dir( $abs_dir ) ) { + throw new \InvalidArgumentException( "Directory does not exist: {$directory}" ); + } + + $files = []; + $results = []; + + // Collect files + $this->collect_files( $abs_dir, $files, $file_types, 0, $max_depth ); + + foreach ( $files as $file_path ) { + if ( count( $results ) >= $max_results ) { + break; + } + + $matches = $this->searchInFile( $file_path, $needles, $case_sensitive ); + if ( ! empty( $matches ) ) { + $rel_path = $this->file_path_resolver->to_relative( $file_path ); + $results[] = [ + 'file' => $rel_path, + 'matches' => $matches, + ]; + } + } + + return [ + 'results' => $results, + 'total_matches' => array_sum( array_map( fn( $r ) => count( $r['matches'] ), $results ) ), + 'truncated' => count( $results ) >= $max_results, + ]; + } + + /** + * @param array $file_types + */ + private function collect_files( string $dir, array &$files, array $file_types, int $current_depth, int $max_depth ): void { + if ( $current_depth >= $max_depth ) { + return; + } + + $items = glob( $dir . '/*' ); + if ( ! $items ) { + return; + } + + foreach ( $items as $item ) { + if ( is_file( $item ) ) { + $ext = pathinfo( $item, PATHINFO_EXTENSION ); + if ( in_array( $ext, $file_types, true ) ) { + $files[] = $item; + } + } elseif ( is_dir( $item ) ) { + $this->collect_files( $item, $files, $file_types, $current_depth + 1, $max_depth ); + } + } + } + + /** + * @param array $needles + * @return array + */ + private function searchInFile( string $file_path, array $needles, bool $case_sensitive ): array { + $content = file_get_contents( $file_path ); + if ( $content === false ) { + return []; + } + + $lines = explode( "\n", $content ); + $matches = []; + + foreach ( $lines as $line_num => $line_content ) { + foreach ( $needles as $needle ) { + $search_line = $case_sensitive ? $line_content : strtolower( $line_content ); + $search_needle = $case_sensitive ? $needle : strtolower( $needle ); + + if ( strpos( $search_line, $search_needle ) !== false ) { + $matches[] = [ + 'line' => $line_num + 1, + 'content' => trim( $line_content ), + 'needle' => $needle, + ]; + } + } + } + + return $matches; + } +} diff --git a/src/src/AI/webserver/Tools/TreeDirectoryTool.php b/src/src/AI/webserver/Tools/TreeDirectoryTool.php new file mode 100644 index 000000000..41838286e --- /dev/null +++ b/src/src/AI/webserver/Tools/TreeDirectoryTool.php @@ -0,0 +1,107 @@ +base_description( + 'Generate a tree structure of a directory', + [ + 'Show directory tree: directory="src"', + 'Limited depth: directory="src", max_depth=3', + ] + ); + } + + public function get_function_info(): FunctionInfo { + $params = [ + new Parameter( 'directory', 'string', 'Directory to generate tree for (default: ".")' ), + new Parameter( 'max_depth', 'integer', 'Maximum depth to traverse (default: 5)' ), + ]; + + return new FunctionInfo( + $this->get_name(), + [ $this, 'tree_directory' ], + $this->get_description(), + $params, + [] // no required parameters + ); + } + + public function tree_directory( + string $directory = '.', + int $max_depth = 5 + ): string { + $result = $this->execute( compact( 'directory', 'max_depth' ) ); + + return json_encode( $result, JSON_UNESCAPED_SLASHES ); + } + + /** + * @param array $p + * @return array + */ + protected function do( array $p ) { + $directory = $this->safe_path( $p['directory'] ?? '.' ); + $max_depth = $p['max_depth'] ?? 5; + + $abs_dir = $this->file_path_resolver->to_absolute( $directory ); + + if ( ! is_dir( $abs_dir ) ) { + throw new \InvalidArgumentException( "Directory does not exist: {$directory}" ); + } + + $tree = $this->build_tree( $abs_dir, 0, $max_depth ); + + return [ + 'directory' => $directory, + 'tree' => $tree, + 'format' => 'nested_array', + ]; + } + + /** + * @return array + */ + private function build_tree( string $dir, int $current_depth, int $max_depth ): array { + if ( $current_depth >= $max_depth ) { + return [ '_truncated' => true ]; + } + + $items = glob( $dir . '/*' ); + if ( ! $items ) { + return []; + } + + $tree = []; + + foreach ( $items as $item ) { + $basename = basename( $item ); + $rel_path = $this->file_path_resolver->to_relative( $item ); + + if ( is_dir( $item ) ) { + $tree[ $basename ] = [ + 'type' => 'directory', + 'path' => $rel_path, + 'children' => $this->build_tree( $item, $current_depth + 1, $max_depth ), + ]; + } else { + $tree[ $basename ] = [ + 'type' => 'file', + 'path' => $rel_path, + 'size' => filesize( $item ), + ]; + } + } + + return $tree; + } +} diff --git a/src/src/AI/webserver/bootstrap-node.php b/src/src/AI/webserver/bootstrap-node.php new file mode 100644 index 000000000..693477903 --- /dev/null +++ b/src/src/AI/webserver/bootstrap-node.php @@ -0,0 +1,150 @@ + + */ +function qit_http_request( bool $check_token = true ): array { + qit_runtime_init(); + + $method = $_SERVER['REQUEST_METHOD'] ?? 'GET'; + $raw_uri = $_SERVER['REQUEST_URI'] ?? '/'; + $uri = parse_url( $raw_uri, PHP_URL_PATH ) ?? '/'; + $headers = getallheaders(); + $remote_addr = $_SERVER['REMOTE_ADDR'] ?? ''; + + // ---- token guard -------------------------------------------------- + if ( $check_token ) { + $provided_token = $headers['X-Node-Token'] ?? ''; + if ( $provided_token !== getenv( 'QIT_NODE_TOKEN' ) ) { + // Log **before** responding so we can correlate in Manager logs. + if ( function_exists( 'log_error' ) ) { + \log_error('Request rejected – node token mismatch', [ + 'expected_prefix' => substr( getenv( 'QIT_NODE_TOKEN' ), 0, 8 ) . '...', + 'provided_prefix' => substr( $provided_token, 0, 8 ) . '...', + 'method' => $method, + 'uri' => $uri, + 'remote_addr' => $remote_addr, + ]); + } + http_response_code( 403 ); + echo json_encode( [ 'error' => 'Unauthorized – token mismatch' ] ); + exit; + } + } else { + // For worker router, check that it's being requested from localhost (by the poller) + if ( $remote_addr !== '127.0.0.1' && $remote_addr !== 'localhost' ) { + if ( function_exists( 'log_warning' ) ) { + \log_warning('Request rejected – worker router not local', [ + 'remote_addr' => $remote_addr, + 'method' => $method, + 'uri' => $uri, + ]); + } + http_response_code( 403 ); + echo json_encode( [ 'error' => 'Worker can only be accessed from localhost' ] ); + exit; + } + } + + // ---- rate limit --------------------------------------------------- + $key = strtolower( $method ) . '_' . trim( $uri, '/' ) . '_' . md5( $headers['X-Node-Token'] ?? '' ); + $file = getenv( 'QIT_NODE_DIR' ) . "/rate-limit/$key"; + if ( ! is_dir( dirname( $file ) ) ) { + mkdir( dirname( $file ), 0700, true ); + } + if ( file_exists( $file ) && microtime( true ) - filemtime( $file ) < 0.005 ) { + if ( function_exists( 'log_warning' ) ) { + \log_warning('Request rate-limited', [ + 'key' => $key, + 'remote_addr' => $remote_addr, + 'method' => $method, + 'uri' => $uri, + ]); + } + http_response_code( 429 ); + echo json_encode( [ 'error' => 'Rate limited' ] ); + exit; + } + touch( $file ); + + // ---- body --------------------------------------------------------- + $body = file_get_contents( 'php://input' ) ?: ''; + $input = $body === '' ? [] : json_decode( $body, true ); + if ( $body !== '' && json_last_error() !== JSON_ERROR_NONE ) { + if ( function_exists( 'log_error' ) ) { + \log_error('Malformed JSON received', [ + 'json_error' => json_last_error_msg(), + 'method' => $method, + 'uri' => $uri, + 'remote_addr' => $remote_addr, + 'truncated_body' => substr( $body, 0, 200 ) . ( strlen( $body ) > 200 ? '…' : '' ), + ]); + } + http_response_code( 400 ); + echo json_encode( [ 'error' => 'Malformed JSON: ' . json_last_error_msg() ] ); + exit; + } + + return compact( 'method', 'uri', 'headers', 'input' ); +} + +/** + * 3. LLM – boot LLPhant once per request / CLI invocation + * + * @param array $overrides + */ +function qit_llm_boot( array $overrides = [] ): void { + qit_runtime_init(); + + $provider = getenv( 'QIT_PROVIDER' ); + $cfg = json_decode( getenv( 'QIT_PROVIDER_CFG' ), true ) + $overrides; + $cfg['provider'] = $provider; + + $bootstrap = new \QIT_AI_Webserver\Lib\LLPhantBootstrap( $cfg ); + $bootstrap->boot( $cfg ); +} diff --git a/src/src/AI/webserver/helpers.php b/src/src/AI/webserver/helpers.php new file mode 100644 index 000000000..b0f27dcc2 --- /dev/null +++ b/src/src/AI/webserver/helpers.php @@ -0,0 +1,74 @@ + $a + */ + function array_is_list( array $a ): bool { + return $a === [] || ( array_keys( $a ) === range( 0, count( $a ) - 1 ) ); + } +} + +if ( ! empty( getenv( 'QIT_LOG_FILE' ) ) ) { + $log_file = getenv( 'QIT_LOG_FILE' ); +} else { + $log_file = getenv( 'QIT_NODE_DIR' ) . '/qit.log'; +} + +// Configure logging +$router_log_file = $log_file; + +/** + * Enhanced logging functions + */ +/** + * @param array $context + */ +function log_message( string $level, string $message, array $context = [] ): void { + $timestamp = gmdate( 'Y-m-d H:i:s' ); + $formatted_message = "[$timestamp] [$level] [Router] $message"; + + // Add context if available + if ( ! empty( $context ) ) { + $formatted_message .= ' ' . json_encode( $context, JSON_UNESCAPED_SLASHES ); + } + + // Write to log file only (removed duplication to error_log) + global $router_log_file; + file_put_contents( $router_log_file, $formatted_message . PHP_EOL, FILE_APPEND ); +} + +/** + * @param array $context + */ +function log_debug( string $message, array $context = [] ): void { + log_message( 'debug', $message, $context ); +} + +/** + * @param array $context + */ +function log_info( string $message, array $context = [] ): void { + log_message( 'info', $message, $context ); +} + +/** + * @param array $context + */ +function log_warning( string $message, array $context = [] ): void { + log_message( 'warning', $message, $context ); +} + +/** + * @param array $context + */ +function log_error( string $message, array $context = [] ): void { + log_message( 'error', $message, $context ); +} diff --git a/src/src/AI/webserver/index.php b/src/src/AI/webserver/index.php new file mode 100644 index 000000000..17bff07f6 --- /dev/null +++ b/src/src/AI/webserver/index.php @@ -0,0 +1,19 @@ + 'QIT Node Active', + 'endpoints' => [ + '/basic-prompt' => 'Basic AI prompting endpoint', + '/analyze-code' => 'Code analysis endpoint', + '/extract-zip' => 'ZIP extraction endpoint', + '/read-file' => 'File content reading endpoint', + ], + 'version' => '1.0.0', +] ); diff --git a/src/src/AI/webserver/router.listener.php b/src/src/AI/webserver/router.listener.php new file mode 100644 index 000000000..5a6f997bf --- /dev/null +++ b/src/src/AI/webserver/router.listener.php @@ -0,0 +1,290 @@ + $method, + 'uri' => $uri, + 'remote' => $_SERVER['REMOTE_ADDR'] ?? 'cli', +]); + +qit_llm_boot(); // listener sometimes proxies LLM + +switch ( "$method $uri" ) { + + case 'POST /process': + $task = $input ?? []; + + // Check if node is busy using atomic flock to prevent race conditions + $busy_file = getenv( 'QIT_NODE_DIR' ) . '/busy.lock'; + $fp = fopen( $busy_file, 'w' ); + if ( ! flock( $fp, LOCK_EX | LOCK_NB ) ) { // Atomic check+lock + fclose( $fp ); + http_response_code( 503 ); // Better than 409; Service Unavailable + echo json_encode( [ 'error' => 'busy' ] ); + break; + } + // Hold lock briefly (release after forward attempt) + + // ── strict validation ───────────────────────────────────────── + $allowed = [ + 'basic-prompt', + 'read-file', + 'extract-zip', + 'vulnerability-scan', + ]; + $required = [ + 'basic-prompt' => [ 'job_id', 'type', 'messages', 'model' ], + 'read-file' => [ 'job_id', 'type', 'file', 'extract_path', 'session_id' ], + 'extract-zip' => [ 'job_id', 'type', 'zip_url', 'session_id' ], + 'vulnerability-scan' => [ 'job_id', 'type', 'vulnerability', 'model' ], + ]; + + // Validate required fields including callback_url + foreach ( [ 'job_id', 'type', 'callback_url' ] as $key ) { + if ( ! isset( $task[ $key ] ) ) { + log_warning( 'Rejecting /process – missing field', [ + 'field' => $key, + 'job_id' => $task['job_id'] ?? 'unknown', + 'type' => $task['type'] ?? 'unknown', + 'remote' => $_SERVER['REMOTE_ADDR'] ?? 'cli', + ] ); + http_response_code( 400 ); + echo json_encode( [ 'error' => "Missing required field: $key" ] ); + break 2; + } + } + + // Validate callback_url format + if ( ! filter_var( $task['callback_url'], FILTER_VALIDATE_URL ) ) { + log_warning( 'Rejecting /process – invalid callback_url', [ + 'callback_url' => $task['callback_url'], + 'job_id' => $task['job_id'] ?? 'unknown', + ] ); + http_response_code( 400 ); + echo json_encode( [ 'error' => 'Invalid callback_url format' ] ); + break; + } + + if ( ! in_array( $task['type'], $allowed, true ) ) { + log_warning( 'Rejecting /process – unknown type', [ + 'type' => $task['type'], + 'job_id' => $task['job_id'] ?? 'unknown', + ] ); + http_response_code( 400 ); + echo json_encode( [ 'error' => "Unknown type: {$task['type']}" ] ); + break; + } + foreach ( $required[ $task['type'] ] as $key ) { + if ( ! array_key_exists( $key, $task ) ) { + log_warning( 'Rejecting /process – missing field for type', [ + 'type' => $task['type'], + 'field' => $key, + 'job_id' => $task['job_id'] ?? 'unknown', + ] ); + http_response_code( 400 ); + echo json_encode( [ 'error' => "Missing required field for {$task['type']}: $key" ] ); + break 2; + } + } + + // JSON Schema validation + $validator = JsonSchemaValidator::getInstance(); + $validation = $validator->validateInbound( $task, $task['type'] ); + + if ( ! $validation['valid'] ) { + log_warning( 'JSON Schema validation failed', [ + 'errors' => $validation['errors'], + 'job_id' => $task['job_id'] ?? 'unknown', + ] ); + http_response_code( 400 ); + $error_details = implode( '; ', $validation['errors'] ); + echo json_encode( [ + 'error' => 'JSON Schema validation failed: ' . $error_details, + 'details' => $validation['errors'], + ] ); + break; + } else { + log_debug('Inbound validation passed', [ + 'job_id' => $task['job_id'] ?? 'unknown', + 'type' => $task['type'], + ]); + } + // ────────────────────────────────────────────────────────────── + + // Fire-and-forget call to worker + $worker_url = getenv( 'QIT_WORKER_URL' ); + $ch = curl_init( "$worker_url/run-job" ); + curl_setopt_array( $ch, [ + CURLOPT_POST => true, + CURLOPT_POSTFIELDS => json_encode( $task ), + CURLOPT_HTTPHEADER => [ + 'Content-Type: application/json', + 'X-Node-Token: ' . getenv( 'QIT_NODE_TOKEN' ), + ], + // detach – we don't care about the reply + CURLOPT_RETURNTRANSFER => false, + CURLOPT_TIMEOUT_MS => 100, // Increased from 1ms to 100ms for reliability + ] ); + curl_exec( $ch ); + if ( curl_errno( $ch ) ) { + log_error( 'Forward to worker failed', [ + 'curl_error' => curl_error( $ch ), + 'job_id' => $task['job_id'] ?? 'unknown', + ] ); + } + curl_close( $ch ); + + // Release the lock after forward attempt + flock( $fp, LOCK_UN ); + fclose( $fp ); + + http_response_code( 202 ); + log_info( 'Accepted task for async processing', [ + 'job_id' => $task['job_id'], + 'type' => $task['type'], + ] ); + echo json_encode( [ 'status' => 'accepted' ] ); + break; + + /* + ────────────────────────────────────────── + * Internal: payload validation + * ────────────────────────────────────────── + */ + case 'POST /internal/register': + // 0. Validate secret first + $expected = getenv( 'QIT_INTERNAL_TOKEN' ); + $provided = $headers['x-internal-token'] ?? ( $headers['X-Internal-Token'] ?? null ); + + if ( ! is_string( $expected ) || ! hash_equals( $expected, (string) $provided ) ) { + http_response_code( 403 ); + echo '{"error":"forbidden"}'; + break; + } + + $registration = $input; // raw JSON from CLI + + $validator = JsonSchemaValidator::getInstance(); + $validation = $validator->validateOutbound( $registration, 'node-registration' ); + + // Return validation result only + http_response_code( 200 ); + echo json_encode( [ + 'valid' => $validation['valid'], + 'errors' => $validation['errors'] ?? [], + ] ); + break; + + /* + ────────────────────────────────────────── + * NEW: Log bundle for remote debugging + * Route: POST /collect-logs + * Auth: X-Node-Token (already enforced by qit_http_request) + * Body: { "since": "2025-07-12T00:00:00Z", "glob": "*.log" } + * Resp: { "archive": "" } + * Note: Zero DB writes – temp files live inside QIT_NODE_DIR. + * ────────────────────────────────────────── + */ + case 'POST /collect-logs': + try { + $validator = JsonSchemaValidator::getInstance(); + $inbound = $validator->validateInbound( $input ?? [], 'collect-logs' ); + if ( ! $inbound['valid'] ) { + log_warning('collect-logs validation failed', [ + 'errors' => $inbound['errors'], + ]); + http_response_code( 400 ); + $error_details = implode( '; ', $inbound['errors'] ); + echo json_encode( [ + 'error' => 'schema_error: ' . $error_details, + 'details' => $inbound['errors'], + ] ); + break; + } else { + log_debug( 'collect-logs validation passed' ); + } + + $params = $input ?? []; + $since = isset( $params['since'] ) ? strtotime( $params['since'] ) : null; + $glob = $params['glob'] ?? '*.log'; + + $log_dir = dirname( getenv( 'QIT_LOG_FILE' ) ); + $iter = new RecursiveIteratorIterator( + new RecursiveDirectoryIterator( $log_dir, RecursiveDirectoryIterator::SKIP_DOTS ) + ); + + $files = []; + foreach ( $iter as $f ) { + if ( ! $f->isFile() ) { + continue; + } + if ( ! fnmatch( $glob, $f->getFilename() ) ) { + continue; + } + if ( $since && $f->getMTime() < $since ) { + continue; + } + $files[] = $f->getPathname(); + } + + if ( $files === [] ) { + $payload = [ + 'status' => 'no_logs', + 'archive' => null, + ]; + $validator->validateOutbound( $payload, 'collect-logs-response' ); // assert + echo json_encode( $payload ); + break; + } + + $tmp_base = rtrim( getenv( 'QIT_NODE_DIR' ), '/' ); + $tar_path = tempnam( $tmp_base, 'qit-logs-' ) . '.tar'; + $tar = new PharData( $tar_path ); + foreach ( $files as $p ) { + $tar->addFile( $p, basename( $p ) ); + } + $tar->compress( Phar::GZ ); + unset( $tar ); + unlink( $tar_path ); + + $gz_path = $tar_path . '.gz'; + $b64 = base64_encode( file_get_contents( $gz_path ) ); + unlink( $gz_path ); + + $payload = [ + 'status' => 'ok', + 'archive' => $b64, + ]; + $validator->validateOutbound( $payload, 'collect-logs-response' ); + + echo json_encode( $payload ); + } catch ( Throwable $e ) { + http_response_code( 500 ); + echo json_encode( [ + 'error' => 'collect_logs_failed', + 'detail' => $e->getMessage(), + ] ); + } + break; + + default: + http_response_code( 404 ); + echo json_encode( [ 'error' => 'Route not found on Listener. Method: ' . $method . ', URI: ' . $uri ] ); +} diff --git a/src/src/AI/webserver/router.worker.php b/src/src/AI/webserver/router.worker.php new file mode 100644 index 000000000..33bffca6f --- /dev/null +++ b/src/src/AI/webserver/router.worker.php @@ -0,0 +1,130 @@ + $method, + 'uri' => $uri, + 'remote' => $_SERVER['REMOTE_ADDR'] ?? 'cli', +]); + +qit_llm_boot( [ + 'temperature' => $input['temperature'] ?? null, + 'max_tokens' => $input['max_tokens'] ?? null, +] ); + +use QIT_AI_Webserver\Lib\CallbackSender; +use QIT_AI_Webserver\Endpoints\{ + BasicPromptEndpoint, + ZipExtractionEndpoint, + FileReadingEndpoint, + VulnerabilityScanEndpoint +}; + +$endpoints = [ + 'basic-prompt' => new BasicPromptEndpoint(), + 'extract-zip' => new ZipExtractionEndpoint(), + 'read-file' => new FileReadingEndpoint(), + 'vulnerability-scan' => new VulnerabilityScanEndpoint(), +]; + +// Initialize callback sender - throw if environment variable is not available +if ( empty( getenv( 'QIT_NODE_TOKEN' ) ) ) { + throw new \RuntimeException( 'Environment variable QIT_NODE_TOKEN is not set' ); +} + +$callback_sender = new CallbackSender(); + +if ( $method === 'POST' && $uri === '/run-job' ) { + $task = $input; // already validated by listener + if ( ! isset( $task['task_id'] ) ) { + throw new RuntimeException( 'Missing task_id in job payload' ); + } + $task_id = $task['task_id']; + $callback_url = $task['callback_url']; + $task_type = $task['type']; + + log_info( 'Starting job', [ + 'job_id' => $task_id, + 'type' => $task_type, + ] ); // Add logging + + $start = microtime( true ); + try { + $result = $endpoints[ $task_type ]->handle( $task ); // ← same endpoint map you already have + + $processing_time = round( ( microtime( true ) - $start ) * 1000 ); + + // Restore original parse/log + log_info( 'Endpoint handler result', [ + 'task_id' => $task_id, + 'type' => $task_type, + 'result_length' => strlen( $result ), + 'result_starts' => substr( $result, 0, 50 ) . '...', + 'processing_time_ms' => $processing_time, + ] ); + + $decoded_result = json_decode( $result, true ); + if ( json_last_error() !== JSON_ERROR_NONE ) { + throw new RuntimeException( 'Invalid JSON response from endpoint: ' . json_last_error_msg() ); + } + + $tool_calls = $decoded_result['_tool_calls'] ?? []; + $metadata = $decoded_result['_metadata'] ?? []; + unset( $decoded_result['_processing_time'], $decoded_result['_tool_calls'], $decoded_result['_metadata'] ); + + $ok = $callback_sender->send_callback( + $callback_url, + $task['action_id'] ?? $task_id, + $decoded_result, + (int) round( $processing_time ), + $tool_calls, + $metadata, + $task_id + ); + if ( ! $ok ) { + throw new RuntimeException( 'callback failed' ); + } + http_response_code( 200 ); + echo '{"status":"done"}'; + } catch ( \Throwable $e ) { + $callback_sender->send_error_callback( + $callback_url, + $task['action_id'] ?? $task_id, + $e->getMessage(), + $task_id + ); + http_response_code( 500 ); + echo '{"error":"' . esc_js( $e->getMessage() ) . '"}'; + } finally { + // clear busy flag so the next /process can succeed + @unlink( getenv( 'QIT_NODE_DIR' ) . '/busy.lock' ); + } + + log_info( 'Finished job', [ 'job_id' => $task_id ] ); // Add logging + return; +} + +http_response_code( 404 ); +echo json_encode( [ 'error' => 'Route not found on Worker' ] ); diff --git a/src/src/AI/webserver/schemas/inbound/basic-prompt.json b/src/src/AI/webserver/schemas/inbound/basic-prompt.json new file mode 100644 index 000000000..e9d5356d2 --- /dev/null +++ b/src/src/AI/webserver/schemas/inbound/basic-prompt.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Basic Prompt Request", + "type": "object", + "required": ["job_id", "type", "messages", "model", "callback_url"], + "properties": { + "job_id": { + "type": "string", + "description": "Unique identifier for the job" + }, + "type": { + "type": "string", + "enum": ["basic-prompt"], + "description": "Request type identifier" + }, + "messages": { + "type": "array", + "description": "Array of chat messages", + "items": { + "type": "object", + "required": ["role", "content"], + "properties": { + "role": { + "type": "string", + "enum": ["system", "user", "assistant"], + "description": "Message role" + }, + "content": { + "type": "string", + "description": "Message content" + } + } + } + }, + "model": { + "oneOf": [ + { + "type": "string", + "description": "AI model to use for vulnerability analysis" + }, + { + "type": "object", + "description": "Provider-specific AI models", + "properties": { + "openai": { + "type": "string", + "description": "OpenAI model identifier" + }, + "anthropic": { + "type": "string", + "description": "Anthropic model identifier" + }, + "lmstudio": { + "type": "string", + "description": "LMStudio model identifier" + } + }, + "additionalProperties": false + } + ], + "description": "AI model to use for vulnerability analysis (string or provider object)" + }, + "action_id": { + "type": "string", + "description": "Unique identifier for the action" + }, + "task_id": { + "type": ["string", "null"], + "description": "Unique identifier for the task (manager-generated)" + }, + "callback_url": { + "type": "string", + "format": "uri", + "description": "URL to send the response back to" + }, + "response_format": { + "type": "object", + "description": "Optional response format specification", + "properties": { + "type": { + "type": "string", + "enum": ["json_schema", "text"] + }, + "schema": { + "type": "object", + "description": "JSON schema for structured responses" + } + } + }, + "options": { + "type": "object", + "description": "Additional processing options" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/src/src/AI/webserver/schemas/inbound/collect-logs.json b/src/src/AI/webserver/schemas/inbound/collect-logs.json new file mode 100644 index 000000000..d0ebf4ef1 --- /dev/null +++ b/src/src/AI/webserver/schemas/inbound/collect-logs.json @@ -0,0 +1,10 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Collect Logs Request", + "type": "object", + "properties": { + "since": { "type": "string", "format": "date-time" }, + "glob": { "type": "string" } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/src/src/AI/webserver/schemas/inbound/extract-zip.json b/src/src/AI/webserver/schemas/inbound/extract-zip.json new file mode 100644 index 000000000..182fdaa0b --- /dev/null +++ b/src/src/AI/webserver/schemas/inbound/extract-zip.json @@ -0,0 +1,45 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ZIP Extraction Request", + "type": "object", + "required": ["job_id", "type", "zip_url", "session_id", "callback_url"], + "properties": { + "job_id": { + "type": "string", + "description": "Unique identifier for the job" + }, + "type": { + "type": "string", + "enum": ["extract-zip"], + "description": "Request type identifier" + }, + "zip_url": { + "type": "string", + "format": "uri", + "description": "URL of the ZIP file to extract" + }, + "session_id": { + "type": "string", + "description": "Session identifier for extraction workspace" + }, + "callback_url": { + "type": "string", + "format": "uri", + "description": "URL to send the response back to" + }, + "requires": { + "type": "string", + "enum": ["new_extraction_dir", "wordpress_on_extraction_dir"], + "description": "Precondition requirements for extraction" + }, + "action_id": { + "type": "string", + "description": "Unique identifier for the action" + }, + "task_id": { + "type": ["string","null"], + "description": "Unique task identifier" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/src/src/AI/webserver/schemas/inbound/read-file.json b/src/src/AI/webserver/schemas/inbound/read-file.json new file mode 100644 index 000000000..17f0dfb33 --- /dev/null +++ b/src/src/AI/webserver/schemas/inbound/read-file.json @@ -0,0 +1,47 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "File Reading Request", + "type": "object", + "required": ["job_id", "type", "file", "extract_path", "session_id", "callback_url"], + "properties": { + "job_id": { + "type": "string", + "description": "Unique identifier for the job" + }, + "type": { + "type": "string", + "enum": ["read-file"], + "description": "Request type identifier" + }, + "file": { + "type": "string", + "description": "Relative path to the file to read", + "pattern": "^[^\\0]*$", + "not": { + "pattern": "\\.\\." + } + }, + "extract_path": { + "type": "string", + "description": "Base path where files are extracted" + }, + "session_id": { + "type": "string", + "description": "Session identifier for workspace context" + }, + "callback_url": { + "type": "string", + "format": "uri", + "description": "URL to send the response back to" + }, + "action_id": { + "type": "string", + "description": "Unique identifier for the action" + }, + "task_id": { + "type": ["string","null"], + "description": "Unique task identifier" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/src/src/AI/webserver/schemas/inbound/vulnerability-scan.json b/src/src/AI/webserver/schemas/inbound/vulnerability-scan.json new file mode 100644 index 000000000..842912cc3 --- /dev/null +++ b/src/src/AI/webserver/schemas/inbound/vulnerability-scan.json @@ -0,0 +1,111 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Vulnerability Scan Request", + "type": "object", + "required": ["job_id", "type", "vulnerability", "model", "callback_url"], + "properties": { + "job_id": { + "type": "string", + "description": "Unique identifier for the job" + }, + "type": { + "type": "string", + "enum": ["vulnerability-scan"], + "description": "Request type identifier" + }, + "vulnerability": { + "type": "object", + "description": "Vulnerability information to scan for" + }, + "model": { + "oneOf": [ + { + "type": "string", + "description": "AI model to use for vulnerability analysis" + }, + { + "type": "object", + "description": "Provider-specific AI models", + "properties": { + "openai": { + "type": "string", + "description": "OpenAI model identifier" + }, + "anthropic": { + "type": "string", + "description": "Anthropic model identifier" + }, + "lmstudio": { + "type": "string", + "description": "LMStudio model identifier" + } + }, + "additionalProperties": false + } + ], + "description": "AI model to use for vulnerability analysis (string or provider object)" + }, + "action_id": { + "type": "string", + "description": "Unique identifier for the action" + }, + "task_id": { + "type": ["string","null"], + "description": "Unique identifier for the task" + }, + "callback_url": { + "type": "string", + "format": "uri", + "description": "URL to send the response back to" + }, + "files": { + "type": "array", + "description": "Array of files to analyze (exactly 1 file required)", + "minItems": 1, + "maxItems": 1, + "items": { + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Relative path to the file" + } + } + } + }, + "plugin_meta": { + "type": "object", + "description": "Plugin metadata information", + "properties": { + "slug": { + "type": "string", + "description": "Plugin marketplace slug" + } + } + }, + "response_format": { + "type": "object", + "description": "Response format specification for structured output" + }, + "extract_path": { + "type": "string", + "description": "Path to extracted plugin files" + }, + "sut_relative_dir": { + "type": "string", + "description": "Relative directory path within the extracted files" + }, + "context": { + "type": "object", + "description": "Additional context information for the scan" + }, + "static_analysis_results": { + "type": "array", + "description": "Results from static analysis tools", + "items": { + "type": "object" + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/src/src/AI/webserver/schemas/outbound/collect-logs-response.json b/src/src/AI/webserver/schemas/outbound/collect-logs-response.json new file mode 100644 index 000000000..e50640fd6 --- /dev/null +++ b/src/src/AI/webserver/schemas/outbound/collect-logs-response.json @@ -0,0 +1,17 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Collect Logs Response", + "type": "object", + "required": ["status", "archive"], + "properties": { + "status": { + "type": "string", + "enum": ["ok", "no_logs"] + }, + "archive": { + "type": ["string", "null"], + "description": "Base‑64(gzip(tar)) or null when status=no_logs" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/src/src/AI/webserver/schemas/outbound/node-heartbeat.json b/src/src/AI/webserver/schemas/outbound/node-heartbeat.json new file mode 100644 index 000000000..4773c5011 --- /dev/null +++ b/src/src/AI/webserver/schemas/outbound/node-heartbeat.json @@ -0,0 +1,54 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Node Heartbeat Request", + "type": "object", + "required": ["node_token", "busy"], + "properties": { + "node_token": { + "type": "string", + "description": "Authentication token for the node" + }, + "busy": { + "type": "integer", + "enum": [0, 1], + "description": "Whether the node is currently processing a task (0=idle, 1=busy)" + }, + "last_error": { + "type": ["object", "null"], + "description": "Last error encountered by the node", + "properties": { + "error_type": { + "type": "string", + "description": "Type of error that occurred" + }, + "error_message": { + "type": "string", + "description": "Human-readable error message" + }, + "job_id": { + "type": "string", + "description": "ID of the job that encountered the error" + }, + "job_type": { + "type": "string", + "description": "Type of job that encountered the error" + } + } + }, + "system_info": { + "type": "object", + "description": "System health metrics", + "properties": { + "memory_usage": { + "type": "integer", + "description": "Current memory usage in bytes" + }, + "cpu_load": { + "type": ["number", "null"], + "description": "Current CPU load average" + } + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/src/src/AI/webserver/schemas/outbound/node-registration.json b/src/src/AI/webserver/schemas/outbound/node-registration.json new file mode 100644 index 000000000..4dbc16311 --- /dev/null +++ b/src/src/AI/webserver/schemas/outbound/node-registration.json @@ -0,0 +1,39 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Node Registration Request", + "type": "object", + "required": ["tunnel_url", "client_id", "endpoint", "node_token", "capabilities"], + "properties": { + "tunnel_url": { + "type": "string", + "format": "uri", + "description": "Public URL where the node can be reached (tunneled or direct)" + }, + "client_id": { + "type": "string", + "description": "Unique client identifier for the node" + }, + "endpoint": { + "type": "string", + "enum": ["/process"], + "description": "Processing endpoint path on the node" + }, + "node_token": { + "type": "string", + "description": "Authentication token for the node (64-character hex string)" + }, + "capabilities": { + "type": "array", + "description": "Node capabilities (currently empty array)", + "items": { + "type": "object" + } + }, + "node_name": { + "type": ["string", "null"], + "maxLength": 50, + "description": "Optional friendly name for the node (e.g., 'Office PC', 'Gaming Rig')" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/src/src/AI/webserver/schemas/outbound/node-unregistration.json b/src/src/AI/webserver/schemas/outbound/node-unregistration.json new file mode 100644 index 000000000..61c7e07bc --- /dev/null +++ b/src/src/AI/webserver/schemas/outbound/node-unregistration.json @@ -0,0 +1,13 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Node Unregistration Request", + "type": "object", + "required": ["node_token"], + "properties": { + "node_token": { + "type": "string", + "description": "Authentication token for the node" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/src/src/AI/webserver/schemas/outbound/task-callback-request-error.json b/src/src/AI/webserver/schemas/outbound/task-callback-request-error.json new file mode 100644 index 000000000..bda17b0f9 --- /dev/null +++ b/src/src/AI/webserver/schemas/outbound/task-callback-request-error.json @@ -0,0 +1,46 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Task Callback Error Request", + "type": "object", + "required": ["action_id", "response", "processing_time", "tool_calls", "metadata"], + "properties": { + "action_id": { + "type": "string", + "description": "ID of the failed action/task" + }, + "task_id": { + "type": "string", + "description": "Task identifier" + }, + "response": { + "type": "string", + "description": "JSON-encoded error response containing error details" + }, + "processing_time": { + "type": "integer", + "enum": [0], + "description": "Processing time (always 0 for errors)" + }, + "tool_calls": { + "type": "array", + "maxItems": 0, + "description": "Empty array for error callbacks", + "items": { + "type": "object" + } + }, + "metadata": { + "type": "object", + "required": ["error"], + "properties": { + "error": { + "type": "boolean", + "enum": [true], + "description": "Flag indicating this is an error callback" + } + }, + "description": "Metadata indicating error status" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/src/src/AI/webserver/schemas/outbound/task-callback-request-success.json b/src/src/AI/webserver/schemas/outbound/task-callback-request-success.json new file mode 100644 index 000000000..973fa81dd --- /dev/null +++ b/src/src/AI/webserver/schemas/outbound/task-callback-request-success.json @@ -0,0 +1,36 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Task Callback Success Request", + "type": "object", + "required": ["action_id", "response", "processing_time", "tool_calls", "metadata"], + "properties": { + "action_id": { + "type": "string", + "description": "ID of the completed action/task" + }, + "task_id": { + "type": "string", + "description": "Unique identifier of the task (deterministic)" + }, + "response": { + "type": "string", + "description": "JSON-encoded response from the task processing" + }, + "processing_time": { + "type": ["integer", "null"], + "description": "Processing time in milliseconds" + }, + "tool_calls": { + "type": "array", + "description": "Array of tool calls made during processing", + "items": { + "type": "object" + } + }, + "metadata": { + "type": "object", + "description": "Additional metadata about the task execution" + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/src/src/AI/webserver/schemas/outbound/task-event-push-request.json b/src/src/AI/webserver/schemas/outbound/task-event-push-request.json new file mode 100644 index 000000000..20494fba5 --- /dev/null +++ b/src/src/AI/webserver/schemas/outbound/task-event-push-request.json @@ -0,0 +1,34 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Task Event Push Request", + "type": "object", + "required": ["state", "timestamp"], + "properties": { + "state": { + "type": "string", + "enum": ["queued", "in_progress", "succeeded", "failed", "heartbeat"], + "description": "Current state of the task" + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "RFC 3339 formatted timestamp of the event" + }, + "progress": { + "type": "integer", + "minimum": 0, + "maximum": 100, + "description": "Optional progress percentage (0-100)" + }, + "payload": { + "type": "object", + "description": "Additional event data", + "properties": { + "result": { + "description": "Task result data (for succeeded events)" + } + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/src/src/Commands/AI/NodeStartCommand.php b/src/src/Commands/AI/NodeStartCommand.php new file mode 100644 index 000000000..df4af4273 --- /dev/null +++ b/src/src/Commands/AI/NodeStartCommand.php @@ -0,0 +1,644 @@ +tunnel_runner = $tunnel_runner; + $this->listener = new WebServer( true ); + $this->worker = new WebServer( true ); + $this->cache = $cache; + $this->auth = $auth; + } + + protected function configure(): void { + parent::configure(); + + $this->setDescription( 'Start an AI processing node' ) + ->setHelp( 'This command starts a local AI processing node that contributes to the QIT network.' ) + ->addOption( 'tunnel', null, InputOption::VALUE_OPTIONAL, 'Enable tunneling. Optionally specify the tunnel method to use. Valid options: cloudflared-docker, cloudflared-binary, cloudflared-persistent, jurassictube', 'cloudflared-docker' ) + ->addOption( 'name', null, InputOption::VALUE_OPTIONAL, 'A friendly name for this node (e.g., "Office PC", "Gaming Rig")' ) + ->addOption( 'provider', null, InputOption::VALUE_OPTIONAL, 'LLM provider (openai, lmstudio, anthropic)', 'openai' ) + ->addOption( 'api-key', null, InputOption::VALUE_OPTIONAL, 'API key for cloud providers' ) + ->addOption( 'model', null, InputOption::VALUE_OPTIONAL, 'Default model to use (e.g., o4-mini-2025-04-16, gpt-4-turbo, claude-3-opus-20240229)' ) + ->addOption( 'base-url', null, InputOption::VALUE_OPTIONAL, 'Base URL for OpenAI-compatible providers (e.g., LM Studio)' ); + } + + protected function doExecute( InputInterface $input, OutputInterface $output ): int { + // --------------------------------------------------------------------- + // 1. decide once where this run will live + // --------------------------------------------------------------------- + $run_id = gmdate( 'Ymd-His' ) . '-' . substr( bin2hex( random_bytes( 2 ) ), 0, 4 ); + $run_dir = rtrim( sys_get_temp_dir(), '/\\' ) . "/qit-node/run-$run_id/"; + $this->run_dir = $run_dir; // Store for use in heartbeat + $log_dir = $run_dir; // keep logs in the same folder + + mkdir( $run_dir, 0700, true ); + + // --------------------------------------------------------------------- + // 2. create log objects that point *inside* the run directory + // --------------------------------------------------------------------- + $this->logger = new Logger( $log_dir . 'node.log', Logger::DEBUG ); + $listener_logger = new Logger( $log_dir . 'listener.log', Logger::DEBUG ); + $worker_logger = new Logger( $log_dir . 'worker.log', Logger::DEBUG ); + + ini_set( 'log_errors', 1 ); + ini_set( 'error_log', $log_dir . 'node.log' ); // fatal errors → node.log + ini_set( 'display_errors', 0 ); + + // optional: show paths to the user + $output->writeln( "Run directory : $run_dir" ); + $output->writeln( "Listener log : {$listener_logger->get_log_file()}" ); + $output->writeln( "Worker log : {$worker_logger->get_log_file()}" ); + + // Write initial marker & sanity-check log files + $startup_marker = '=== QIT Node boot sequence initiated at ' . gmdate( 'Y-m-d H:i:s' ) . ' ==='; + + foreach ( [ $this->logger, $listener_logger, $worker_logger ] as $lg ) { + /* @var Logger $lg */ + $ok = @file_put_contents( $lg->get_log_file(), $startup_marker . PHP_EOL, FILE_APPEND ); + if ( $ok === false ) { + $output->writeln( 'Failed to write to log file: ' . $lg->get_log_file() . '' ); + $output->writeln( 'Aborting node startup – unable to create log files. Check filesystem permissions.' ); + + return self::FAILURE; + } + } + + // Log startup (after confirming log files work) + $this->logger->info( 'Starting QIT Node', [ + 'php_version' => PHP_VERSION, + 'os' => PHP_OS, + ] ); + + // Generate a shared token for both servers + $node_token = bin2hex( random_bytes( 32 ) ); + + // Generate a per-run secret for internal routes + $internal_token = bin2hex( random_bytes( 32 ) ); + + // Pass loggers to servers and set the shared token + $this->listener->setLogger( $listener_logger ); + $this->worker->setLogger( $worker_logger ); + $this->listener->setNodeToken( $node_token ); + $this->worker->setNodeToken( $node_token ); + + // Get provider configuration + $provider = $input->getOption( 'provider' ); + $provider_config = []; + + switch ( $provider ) { + case 'lmstudio': + // LM Studio uses OpenAI-compatible API but doesn't require API key + $provider_config['api_key'] = $input->getOption( 'api-key' ) ?: 'dummy'; // LM Studio ignores this + $provider_config['base_url'] = $input->getOption( 'base-url' ) ?: 'http://localhost:1234/v1'; + $provider_config['model'] = $input->getOption( 'model' ) ?: 'deepseek/deepseek-r1-0528-qwen3-8b'; + break; + + case 'openai': + if ( ! $input->getOption( 'api-key' ) ) { + $output->writeln( 'API key is required for ' . $provider . '' ); + + return self::FAILURE; + } + $provider_config['api_key'] = $input->getOption( 'api-key' ); + // Set default model to o4-mini-2025-04-16 if not specified + $provider_config['model'] = $input->getOption( 'model' ) ?: 'o4-mini-2025-04-16'; + // Support custom base URL for OpenAI-compatible providers + if ( $input->getOption( 'base-url' ) ) { + $provider_config['base_url'] = $input->getOption( 'base-url' ); + } + break; + + case 'anthropic': + if ( ! $input->getOption( 'api-key' ) ) { + $output->writeln( 'API key is required for ' . $provider . '' ); + + return self::FAILURE; + } + $provider_config['api_key'] = $input->getOption( 'api-key' ); + if ( $input->getOption( 'model' ) ) { + $provider_config['model'] = $input->getOption( 'model' ); + } + break; + + default: + $output->writeln( 'Unsupported provider: ' . $provider . '' ); + + return self::FAILURE; + } + + // Set runtime configuration + $runtime_cfg = [ + 'ai_dir' => Config::get_qit_dir() . 'ai' . DIRECTORY_SEPARATOR, + 'tmp_base' => $run_dir, // every copied router lives here + ]; + + foreach ( [ $this->listener, $this->worker ] as $srv ) { + $srv->setRuntimeConfig( $runtime_cfg ); + $srv->setProviderConfig( $provider, $provider_config ); + $srv->setNodeToken( $node_token ); // NEW + } + + // Configure listener to use router.listener.php + $this->listener->setRouterTemplate( 'router.listener.php' ); + + // Configure worker to use router.worker.php and bind only to 127.0.0.1 + $this->worker->setRouterTemplate( 'router.worker.php' ); + $this->worker->setBindLocalhostOnly(); + + // Check LM Studio availability if using LM Studio provider + if ( $provider === 'lmstudio' ) { + $output->write( 'Checking LM Studio API... ' ); + try { + // Test LM Studio connection by checking models endpoint + $base_url = $provider_config['base_url'] ?? 'http://localhost:1234/v1'; + $models_endpoint = rtrim( $base_url, '/' ) . '/models'; + + $ch = curl_init( $models_endpoint ); + curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); + curl_setopt( $ch, CURLOPT_TIMEOUT, 5 ); + curl_setopt( $ch, CURLOPT_HTTPHEADER, [ + 'Content-Type: application/json', + 'Authorization: Bearer ' . ( $provider_config['api_key'] ?? 'dummy' ), + ] ); + + $response = curl_exec( $ch ); + $http_code = curl_getinfo( $ch, CURLINFO_HTTP_CODE ); + $error = curl_error( $ch ); + curl_close( $ch ); + + if ( $http_code === 200 ) { + $output->writeln( '' ); + + // Check if any models are loaded + $data = json_decode( $response, true ); + if ( isset( $data['data'] ) && is_array( $data['data'] ) && count( $data['data'] ) > 0 ) { + $model_count = count( $data['data'] ); + $output->writeln( "Found {$model_count} model(s) loaded in LM Studio" ); + } else { + $output->writeln( 'No models currently loaded in LM Studio. You may need to load a model through the LM Studio UI.' ); + } + } else { + $output->writeln( '' ); + $output->writeln( 'Cannot connect to LM Studio API at ' . $base_url . '' ); + $output->writeln( 'Please ensure LM Studio is running and the API server is started.' ); + $output->writeln( 'You can start LM Studio and enable the API server in the settings.' ); + + return self::FAILURE; + } + } catch ( \Exception $e ) { + $output->writeln( '' ); + $output->writeln( 'Failed to check LM Studio: ' . $e->getMessage() . '' ); + + return self::FAILURE; + } + } else { + $output->writeln( 'Using ' . $provider . ' provider' ); + } + + // Check authentication + if ( ! $this->auth->get_manager_secret() && ! $this->auth->get_partner_auth() ) { + $output->writeln( 'You must be authenticated to start a node. Run "qit connect" first.' ); + + return self::FAILURE; + } + + // Generate environment ID + $this->env_id = bin2hex( random_bytes( 8 ) ); + + // Get node name + $node_name = $this->getNodeName( $input ); + + try { + // Get client ID + $this->client_id = $this->cache->get( 'client_id' ); // Store as class property + if ( ! $this->client_id ) { + throw new \Exception( 'Client ID not found. This should have been generated during bootstrap.' ); + } + + // We're using the shared token for both servers + + // 1. Start the internal worker first ― it tells us its random port + $this->worker_url = $this->worker->start(); + $output->writeln( '✓ Started worker server on ' . $this->worker_url . '' ); + + // 2. Inject that URL into the listener's environment + $this->listener->setEnvironmentVariable( 'QIT_WORKER_URL', $this->worker_url ); + $this->listener->setEnvironmentVariable( 'QIT_MANAGER_URL', get_manager_url() ); + $this->listener->setEnvironmentVariable( 'QIT_INTERNAL_TOKEN', $internal_token ); + + // 3. Now launch the listener once + $listener_url = $this->listener->start(); + $output->writeln( '✓ Started listener server on ' . $listener_url . '' ); + + // Create tunnel for the listener + if ( $input->getOption( 'tunnel' ) === 'none' ) { + $this->tunnel_url = $listener_url; + $output->writeln( '✓ No tunnel created. Using listener URL: ' . $this->tunnel_url . '' ); + } else { + $this->tunnel_runner->check_tunnel_support( $input->getOption( 'tunnel' ) ); + $this->tunnel_url = $this->tunnel_runner->start_tunnel( $listener_url, $this->env_id ); // Store as class property + $output->writeln( '✓ Created secure tunnel: ' . $this->tunnel_url . '' ); + } + + // Register with Manager now that we have the final tunnel URL + $output->writeln( 'Registering with QIT network...' ); + + $registration_data = [ + 'tunnel_url' => $this->tunnel_url, + 'client_id' => $this->client_id, + 'endpoint' => '/process', + 'node_token' => $node_token, + 'capabilities' => [], // Empty for now + 'node_name' => $node_name, + ]; + + // First, validate the payload using the internal validation endpoint + $listener_validate_url = $listener_url . '/internal/register'; + $ch = curl_init( $listener_validate_url ); + curl_setopt_array( $ch, [ + CURLOPT_POST => true, + CURLOPT_POSTFIELDS => json_encode( $registration_data ), + CURLOPT_HTTPHEADER => [ + 'Content-Type: application/json', + 'X-Internal-Token: ' . $internal_token, + 'X-Node-Token: ' . $node_token, + ], + CURLOPT_RETURNTRANSFER => true, + CURLOPT_TIMEOUT => 100, + ] ); + $validation_response_json = curl_exec( $ch ); + if ( $validation_response_json === false ) { + throw new \RuntimeException( 'Local validation failed: ' . curl_error( $ch ) ); + } + $validation_http_code = curl_getinfo( $ch, CURLINFO_HTTP_CODE ); + curl_close( $ch ); + + if ( $validation_http_code !== 200 ) { + throw new \RuntimeException( "Validation failed, HTTP $validation_http_code: $validation_response_json" ); + } + + $validation_response = json_decode( $validation_response_json, true ); + if ( ! $validation_response['valid'] ) { + $errors = implode( ', ', $validation_response['errors'] ?? [] ); + throw new \RuntimeException( "Registration data validation failed: $errors" ); + } + + // Validation passed, now dispatch directly to Manager + $response_json = ( new RequestBuilder( get_manager_url() . '/wp-json/cd/v1/ai-nodes/register' ) ) + ->with_method( 'POST' ) + ->with_post_body( $registration_data ) + ->with_expected_status_codes( [ 200, 201 ] ) + ->with_retry( 3 ) + ->request(); + + $response = json_decode( $response_json, true ); + + if ( ! isset( $response['node_id'] ) ) { + throw new \Exception( 'Invalid response from Manager: ' . $response_json ); + } + + $this->node_id = $response['node_id']; + $this->node_token = $node_token; + + // Store node credentials for other commands + $this->cache->set( 'active_node_id', $this->node_id, 86400 ); // 24h + $this->cache->set( 'active_node_token', $this->node_token, 86400 ); // 24h + + $output->writeln( '✓ Registered with node ID: ' . $this->node_id . '' ); + + // Set environment variables for the worker now that we have node credentials + $this->worker->setEnvironmentVariable( 'QIT_NODE_ID', $this->node_id ); + $this->worker->setEnvironmentVariable( 'QIT_NODE_TOKEN', $this->node_token ); + $this->worker->setEnvironmentVariable( 'QIT_MANAGER_URL', get_manager_url() ); + + // Clear any stale busy.lock file at startup + @unlink( $run_dir . '/busy.lock' ); + + if ( $node_name ) { + $output->writeln( '✓ Node name: ' . $node_name . '' ); + } + + // Note: Model preloading is not needed for supported providers + // - LM Studio loads models interactively through its UI + // - OpenAI and Anthropic are cloud-based (models always available) + // - Model availability is checked during actual inference calls + + $output->writeln( '' ); + $output->writeln( 'Node started successfully!' ); + + // Handle keeping the process running + if ( extension_loaded( 'pcntl' ) ) { + $output->writeln( 'Press Ctrl+C to stop the node.' ); + + pcntl_signal( SIGINT, function () use ( $output ) { + $output->writeln( "\nShutting down node..." ); + $this->heartbeat_running = false; + $this->cleanup( $output ); + exit( 0 ); + } ); + } else { + $output->writeln( 'Press Ctrl+C to stop the node (or close the terminal window).' ); + } + + // Keep the process running with heartbeat + // This loop works on all platforms (Windows and Unix) + while ( $this->heartbeat_running ) { + // Dispatch signals on platforms that support it + if ( extension_loaded( 'pcntl' ) ) { + pcntl_signal_dispatch(); + } + + $this->sendHeartbeat( $output ); + sleep( 10 ); // Check every 10 seconds + } + } catch ( \Exception $e ) { + $output->writeln( 'Failed to start node: ' . $e->getMessage() . '' ); + $this->cleanup( $output ); + + return self::FAILURE; + } + + return self::SUCCESS; + } + + private function getNodeName( InputInterface $input ): ?string { + // Check if user provided a name + $name = $input->getOption( 'name' ); + if ( $name ) { + // Sanitize and limit length + $name = substr( trim( $name ), 0, 50 ); + // Cache it for future runs + $this->cache->set( 'node_display_name', $name, 86400 * 30 ); // 30 days + + return $name; + } + + // Try to get a cached name + $cached_name = $this->cache->get( 'node_display_name' ); + if ( $cached_name ) { + return $cached_name; + } + + // No name provided or cached + return null; + } + + + private function sendHeartbeat( OutputInterface $output ): void { + if ( ! $this->node_id || ! $this->node_token ) { + $this->logger->warning( 'Skipping heartbeat - node_id or node_token not set' ); + + return; + } + + try { + $this->logger->info( 'Preparing to send heartbeat', [ + 'node_id' => $this->node_id, + 'time' => gmdate( 'Y-m-d H:i:s' ), + ] ); + + // Collect health metrics + $error_file = sys_get_temp_dir() . '/qit-node-last-error.json'; + $last_error = null; + + // Get system metrics for logging + $memory_usage = memory_get_usage( true ); + $cpu_load = sys_getloadavg()[0] ?? null; + + $this->logger->debug( 'Collected system metrics', [ + 'memory_usage' => $memory_usage, + 'memory_usage_mb' => round( $memory_usage / 1024 / 1024, 2 ) . ' MB', + 'cpu_load' => $cpu_load, + ] ); + + if ( file_exists( $error_file ) ) { + $error_content = file_get_contents( $error_file ); + $last_error = json_decode( $error_content, true ); + + if ( json_last_error() !== JSON_ERROR_NONE ) { + $this->logger->warning( 'Failed to parse error file JSON', [ + 'error' => json_last_error_msg(), + 'file' => $error_file, + 'content' => substr( $error_content, 0, 200 ) . '...', + ] ); + } else { + $this->logger->info( 'Found error to report in heartbeat', [ + 'error_type' => $last_error['error_type'] ?? 'unknown', + 'error_message' => $last_error['error_message'] ?? 'unknown', + 'job_id' => $last_error['job_id'] ?? 'not provided', + 'job_type' => $last_error['job_type'] ?? 'unknown', + ] ); + } + + // Debug: show error in verbose mode + if ( $output->isVerbose() && $last_error ) { + $output->writeln( 'Last error: ' . ( $last_error['error_message'] ?? 'Unknown error' ) . '' ); + if ( ! empty( $last_error['job_id'] ) ) { + $output->writeln( ' - Job ID: ' . $last_error['job_id'] ); + } + } + + // Clear the error after reading + unlink( $error_file ); + $this->logger->debug( 'Cleared error file after reading' ); + } else { + $this->logger->debug( 'No error file found for heartbeat' ); + } + + // Check busy status for heartbeat + $busy = file_exists( $this->run_dir . '/busy.lock' ) ? 1 : 0; + + $heartbeat_data = [ + 'node_token' => $this->node_token, + 'busy' => $busy, + 'last_error' => $last_error, + 'system_info' => [ + 'memory_usage' => $memory_usage, + 'cpu_load' => $cpu_load, + ], + ]; + + $this->logger->debug( 'Sending heartbeat request', [ + 'endpoint' => get_manager_url() . '/wp-json/cd/v1/ai-nodes/' . $this->node_id . '/heartbeat', + 'has_error' => $last_error !== null ? 'yes' : 'no', + 'has_job_id' => ! empty( $last_error['job_id'] ) ? 'yes' : 'no', + ] ); + + $start_time = microtime( true ); + + try { + $response_json = ( new RequestBuilder( get_manager_url() . '/wp-json/cd/v1/ai-nodes/' . $this->node_id . '/heartbeat' ) ) + ->with_method( 'POST' ) + ->with_post_body( $heartbeat_data ) + ->with_expected_status_codes( [ 200, 201 ] ) + ->request(); + + $request_time = microtime( true ) - $start_time; + $response = json_decode( $response_json, true ); + + $this->logger->info( 'Heartbeat sent successfully', [ + 'response_time_ms' => round( $request_time * 1000, 2 ), + 'next_heartbeat' => $response['next_heartbeat'] ?? 60, + 'status' => $response['status'] ?? 'unknown', + ] ); + + if ( $output->isVeryVerbose() ) { + $output->writeln( '[' . gmdate( 'H:i:s' ) . '] Heartbeat sent successfully' ); + if ( $last_error ) { + $output->writeln( ' - Reported error: ' . ( $last_error['error_message'] ?? 'Unknown' ) ); + if ( ! empty( $last_error['job_id'] ) ) { + $output->writeln( ' - Job error updated for: ' . $last_error['job_id'] ); + } + } + } + } catch ( NetworkErrorException $e ) { + // This is what RequestBuilder throws + $this->logger->error( 'Heartbeat request failed', [ + 'error' => $e->getMessage(), + 'code' => $e->getCode(), + 'endpoint' => get_manager_url() . '/wp-json/cd/v1/ai-nodes/' . $this->node_id . '/heartbeat', + ] ); + + if ( $output->isVerbose() ) { + $output->writeln( 'Heartbeat failed: ' . $e->getMessage() . '' ); + } + } + } catch ( \Exception $e ) { + $this->logger->error( 'Unexpected heartbeat error', [ + 'error' => $e->getMessage(), + 'class' => get_class( $e ), + 'trace' => $e->getTraceAsString(), + ] ); + + if ( $output->isVerbose() ) { + $output->writeln( 'Heartbeat failed unexpectedly: ' . $e->getMessage() . '' ); + } + } + } + + + private function cleanup( OutputInterface $output ): void { + $this->logger->info( 'Starting node cleanup process' ); + + // Unregister from Manager + if ( $this->node_id && $this->node_token ) { + $this->logger->info( 'Unregistering node from QIT network', [ + 'node_id' => $this->node_id, + ] ); + + try { + $unregistration_data = [ + 'node_token' => $this->node_token, + ]; + + // Validate outbound unregistration request against schema + $validator = \QIT_AI_Webserver\Lib\JsonSchemaValidator::getInstance(); + $validation = $validator->validateOutbound( $unregistration_data, 'node-unregistration' ); + + if ( ! $validation['valid'] ) { + $this->logger->warning( 'Outbound node unregistration validation failed', [ + 'errors' => $validation['errors'], + ] ); + // Continue anyway to maintain backward compatibility, but log the issue + } + + $start_time = microtime( true ); + ( new RequestBuilder( get_manager_url() . '/wp-json/cd/v1/ai-nodes/' . $this->node_id . '/unregister' ) ) + ->with_method( 'POST' ) + ->with_post_body( $unregistration_data ) + ->with_expected_status_codes( [ 200, 201 ] ) + ->request(); + $request_time = microtime( true ) - $start_time; + + $this->logger->info( 'Node unregistered successfully', [ + 'response_time_ms' => round( $request_time * 1000, 2 ), + ] ); + + $output->writeln( '✓ Unregistered from QIT network' ); + } catch ( \Exception $e ) { + $this->logger->error( 'Failed to unregister node', [ + 'error' => $e->getMessage(), + 'trace' => $e->getTraceAsString(), + ] ); + + $output->writeln( 'Failed to unregister: ' . $e->getMessage() . '' ); + } + } else { + $this->logger->warning( 'Skipping unregister - node_id or node_token not set' ); + } + + // Clear busy.lock file on shutdown + if ( ! empty( $this->run_dir ) ) { + @unlink( $this->run_dir . '/busy.lock' ); + $this->logger->debug( 'Cleared busy.lock file on shutdown' ); + } + + // Stop worker server + $this->logger->info( 'Stopping worker server' ); + $this->worker->stop(); + $this->logger->debug( 'Worker server stopped' ); + + // Stop listener server + $this->logger->info( 'Stopping listener server' ); + $this->listener->stop(); + $this->logger->debug( 'Listener server stopped' ); + + // Stop tunnel + if ( $this->env_id ) { + $this->logger->info( 'Stopping tunnel', [ 'env_id' => $this->env_id ] ); + $this->tunnel_runner->stop_tunnel( $this->env_id ); + $this->logger->debug( 'Tunnel stopped' ); + } else { + $this->logger->debug( 'No tunnel to stop (env_id not set)' ); + } + + // Clear cached node info + $this->logger->debug( 'Clearing cached node info' ); + $this->cache->delete( 'active_node_id' ); + $this->cache->delete( 'active_node_token' ); + + $this->logger->info( 'Node cleanup completed' ); + } +} diff --git a/src/src/Logging/Logger.php b/src/src/Logging/Logger.php new file mode 100644 index 000000000..2734be957 --- /dev/null +++ b/src/src/Logging/Logger.php @@ -0,0 +1,185 @@ + + */ + private array $log_level_priorities = [ + self::DEBUG => 0, + self::INFO => 1, + self::WARNING => 2, + self::ERROR => 3, + self::CRITICAL => 4, + ]; + + /** + * Constructor. + * + * @param string|null $log_file Optional. The log file path. If null, a default path in the system's temporary directory will be used. + * @param string $log_level Optional. The minimum log level to record. Default is 'info'. + */ + public function __construct( ?string $log_file = null, string $log_level = self::INFO ) { + // If no log file is specified, create one in the system's temporary directory + if ( $log_file === null ) { + $log_file = sys_get_temp_dir() . '/qit-node.log'; + } + + $this->log_file = $log_file; + $this->log_level = $log_level; + + // Create log file if it doesn't exist and write header + if ( ! file_exists( $log_file ) ) { + $this->write_to_log( '=== QIT Node Log Started at ' . gmdate( 'Y-m-d H:i:s' ) . " ===\n" ); + } + } + + /** + * Get the log file path. + * + * @return string The log file path. + */ + public function get_log_file(): string { + return $this->log_file; + } + + /** + * Log a debug message. + * + * @param string $message The message to log. + * @param array $context Optional. Additional context data to include in the log. + */ + public function debug( string $message, array $context = [] ): void { + $this->log( self::DEBUG, $message, $context ); + } + + /** + * Log an info message. + * + * @param string $message The message to log. + * @param array $context Optional. Additional context data to include in the log. + */ + public function info( string $message, array $context = [] ): void { + $this->log( self::INFO, $message, $context ); + } + + /** + * Log a warning message. + * + * @param string $message The message to log. + * @param array $context Optional. Additional context data to include in the log. + */ + public function warning( string $message, array $context = [] ): void { + $this->log( self::WARNING, $message, $context ); + } + + /** + * Log an error message. + * + * @param string $message The message to log. + * @param array $context Optional. Additional context data to include in the log. + */ + public function error( string $message, array $context = [] ): void { + $this->log( self::ERROR, $message, $context ); + } + + /** + * Log a critical message. + * + * @param string $message The message to log. + * @param array $context Optional. Additional context data to include in the log. + */ + public function critical( string $message, array $context = [] ): void { + $this->log( self::CRITICAL, $message, $context ); + } + + /** + * Log a message with the specified level. + * + * @param string $level The log level. + * @param string $message The message to log. + * @param array $context Optional. Additional context data to include in the log. + */ + public function log( string $level, string $message, array $context = [] ): void { + // Check if this log level should be recorded + if ( ! $this->should_log( $level ) ) { + return; + } + + $timestamp = gmdate( 'Y-m-d H:i:s' ); + $formatted_message = "[$timestamp] [$level] $message"; + + // Add context if available + if ( ! empty( $context ) ) { + $formatted_message .= ' ' . json_encode( $context, JSON_UNESCAPED_SLASHES ); + } + + // Write to log file + $this->write_to_log( $formatted_message . PHP_EOL ); + } + + /** + * Check if the given log level should be recorded based on the current log level threshold. + * + * @param string $level The log level to check. + * @return bool True if the log level should be recorded, false otherwise. + */ + private function should_log( string $level ): bool { + // If the level doesn't exist in our priorities, default to logging it + if ( ! isset( $this->log_level_priorities[ $level ] ) ) { + return true; + } + + // Log if the level priority is >= the threshold priority + // (Remember: lower number = higher priority) + return $this->log_level_priorities[ $level ] >= $this->log_level_priorities[ $this->log_level ]; + } + + /** + * Write a message to the log file. + * + * @param string $message The message to write. + */ + private function write_to_log( string $message ): void { + // Ensure directory exists in case it was deleted while the node is running + $dir = dirname( $this->log_file ); + if ( ! is_dir( $dir ) ) { + // Suppress race-condition warnings: if another process creates it between the check and mkdir() + @mkdir( $dir, 0700, true ); + } + + // Silently ignore write errors, but emit to PHP error_log so that they are visible to operators + if ( @file_put_contents( $this->log_file, $message, FILE_APPEND ) === false ) { + error_log( '[QIT Logger] Failed to write to log file: ' . $this->log_file ); // phpcs:ignore WordPress.PHP.DevelopmentFunctions.error_log_error_log + } + } +} diff --git a/src/src/bootstrap.php b/src/src/bootstrap.php index ed028fc09..907c81b74 100644 --- a/src/src/bootstrap.php +++ b/src/src/bootstrap.php @@ -1,7 +1,9 @@ add( $container->make( RunActivationTestCommand::class ) ); + // AI commands. + if ( WebServer::is_ai_enabled() ) { + $application->add( $container->make( NodeStartCommand::class ) ); + } + // List tests runs. $application->add( $container->make( ListCommand::class ) ); @@ -263,4 +270,8 @@ public function getDefaultCommands() { App::make( EnvironmentDanglingCleanup::class )->cleanup_dangling(); +if ( empty( App::make( Cache::class )->get( 'client_id' ) ) ) { + App::make( Cache::class )->set( 'client_id', \QIT_CLI\generate_uuid4(), - 1 ); +} + return $application; diff --git a/src/src/json-filter.php b/src/src/json-filter.php index 3f4602322..1615a01ac 100644 --- a/src/src/json-filter.php +++ b/src/src/json-filter.php @@ -9,9 +9,9 @@ * Stream filter that only passes valid JSON, collecting non-JSON for error reporting. */ class QIT_JSON_Filter extends \php_user_filter { - private static $non_json_buffer = ''; - private static $has_json_output = false; - private static $initialized = false; + private static string $non_json_buffer = ''; + private static bool $has_json_output = false; + private static bool $initialized = false; public function onCreate(): bool { if ( ! self::$initialized ) { @@ -53,13 +53,13 @@ public static function handle_shutdown(): void { if ( ! empty( trim( self::$non_json_buffer ) ) && ! self::$has_json_output ) { // No JSON output but have non-JSON - this is likely an error echo json_encode( [ - 'error' => 'Command failed with non-JSON output', - 'output' => trim( self::$non_json_buffer ) - ] ) . "\n"; + 'error' => 'Command failed with non-JSON output', + 'output' => trim( self::$non_json_buffer ), + ] ) . "\n"; } // Reset for next use self::$non_json_buffer = ''; self::$has_json_output = false; } -} \ No newline at end of file +}