diff --git a/.gitignore b/.gitignore index 4d9702cc..1d331c5c 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,9 @@ RAG/notebooks/langchain/data/save_embedding # IntelliJ's project specific settings file .idea + +# Environment variables +.env + +# egg-info directories +**/egg-info diff --git a/industries/manufacturing/predictive_maintenance_agent/.gitignore b/industries/manufacturing/predictive_maintenance_agent/.gitignore new file mode 100644 index 00000000..b47a17cb --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/.gitignore @@ -0,0 +1,46 @@ +# macOS system files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Database and vector store files +database/ +*.db +*.sqlite3 + +# Output and generated files +output_data/ +moment/ +readmes/ +*.html +*.csv +*.npy + +# Python package metadata +src/**/*.egg-info/ +*.egg-info/ + +# Environment files (if they contain secrets) +env.sh + +# Model files (if large/binary) +models/*.pkl +models/*.joblib +models/*.model + +# Logs +*.log +logs/ + +# Temporary files +*.tmp +*.temp +.pytest_cache/ +__pycache__/ + +# dot env +mydot.env diff --git a/industries/manufacturing/predictive_maintenance_agent/README.md b/industries/manufacturing/predictive_maintenance_agent/README.md new file mode 100644 index 00000000..6b1fbb8f --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/README.md @@ -0,0 +1,268 @@ +# Predictive Maintenance Agent + +A comprehensive AI-powered predictive maintenance system built with NVIDIA AIQ Toolkit for turbofan engine health monitoring and failure prediction. + +Work done by: Vineeth Kalluru, Janaki Vamaraju, Sugandha Sharma, Ze Yang and Viraj Modak + +## Overview + +Predictive maintenance prevents costly downtime by identifying potential failures before they occur. This agent leverages AI to analyze sensor data from turbofan engines, predict remaining useful life (RUL), and provide actionable insights for maintenance teams. + +### Key Benefits +- **Prevent Costly Downtime**: Identify failures before they occur +- **Optimize Maintenance**: Perform maintenance only when needed +- **Extend Equipment Life**: Monitor health to maximize efficiency +- **Improve Safety**: Prevent catastrophic failures +- **Reduce Costs**: Minimize emergency repairs and disruptions + +## Dataset + +Uses the **NASA Turbofan Engine Degradation Simulation Dataset (C-MAPSS)** with: +- **21 Sensor Measurements**: Temperature, pressure, vibration, and flow +- **3 Operational Settings**: Different flight conditions +- **Multiple Engine Units**: Each with unique degradation patterns +- **Run-to-Failure Data**: Complete lifecycle from healthy operation to failure + +## Architecture + +Multi-agent architecture with: +- **React Agent Workflow**: Main orchestration using ReAct pattern +- **SQL Retriever Tool**: Generates SQL queries using NIM LLM +- **RUL Prediction Tool**: XGBoost model for remaining useful life prediction +- **Plotting Agent**: Multi-tool agent for data visualization +- **Vector Database**: ChromaDB for schema information storage + +#### Agentic workflow architecture diagram +![Agentic workflow](imgs/pred_maint_arch_diagram_img1.png) + +#### Agentic workflow architecture diagram w/ reasoning +![Agentic workflow w/ reasoning](imgs/pred_maint_arch_diagram_img2.png) + +## Setup and Installation + +### Prerequisites +- Python 3.11+ (< 3.13) +- Conda or Miniconda +- NVIDIA NIM API access +- Node.js v18+ (for web interface) + +### 1. Create Conda Environment + +```bash +conda create -n pdm python=3.11 +conda activate pdm +``` + +### 2. Install NVIDIA Nemo Agent Toolkit + +1. Clone the NeMo Agent toolkit repository to your local machine. + ```bash + git clone git@github.com:NVIDIA/NeMo-Agent-Toolkit.git aiqtoolkit + cd aiqtoolkit + ``` + +2. Initialize, fetch, and update submodules in the Git repository. + ```bash + git submodule update --init --recursive + ``` + +3. Fetch the data sets by downloading the LFS files. + ```bash + git lfs install + git lfs fetch + git lfs pull + ``` +4. Install the NeMo Agent toolkit library. + To install the NeMo Agent toolkit library along with all of the optional dependencies. Including developer tools (`--all-groups`) and all of the dependencies needed for profiling and plugins (`--all-extras`) in the source repository, run the following: + ```bash + uv sync --all-groups --all-extras + ``` + +5. Install telemetry plugins + ```bash + uv pip install -e '.[telemetry] + ``` + +### 3. Install Predictive Maintenance Agent + +```bash +cd .. +git clone https://github.com/NVIDIA/GenerativeAIExamples.git +cd GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent +uv pip install -e . +``` + +### 4. Environment Setup +Export all the required environment variables form dot.env file. Update the file with your API key and secrets +before running. + +```bash +source dot.env +``` + +### 5. Database Setup + +1. Download [NASA Turbofan Dataset](https://ti.arc.nasa.gov/tech/dash/groups/pcoe/prognostic-data-repository/) +2. Extract files to `data/` directory +3. Run setup script: +```bash +python setup_database.py +``` + +### 6. Configure Paths + +Update `configs/config.yml`and '`configs/config-reasoning.yml` with your local paths for database, models, and output directories. + +### configs/config.yml or configs/config-reasoning.yml + The db_path must point to the database inside your data directory. +```bash +db_path: "${PWD_PATH}/data/nasa_turbo.db" # ← set it to something like this +``` +Create an empty folder for the output data and point the output folder to that path +```bash +output_folder: "${PWD_PATH}/output_data" # ← set it to something like this +``` + + + +## Launch Server and UI + +### Start AIQ Server + +When using the provided config file, you need to set the PWD_PATH environment variable before starting the AIQ server. This ensures the server can locate all required paths correctly. + +Here's how to do it: + +```bash +aiq serve --config_file=configs/config.yml "$@" +``` +(or) +```bash +aiq serve --config_file=configs/config-reasoning.yml "$@" +``` +Server runs on `http://localhost:8000` + +### Spin up code execution sandbox for Reasoning workflow + +If you plan to use the reasoning config, then it requires you to spin up a code execution sandbox server in a separate terminal. + +Note: You will need a system that can run docker. If you are running this on a MacOS laptop with no Docker Desktop then try [Colima](https://github.com/abiosoft/colima) + +Go to folder + +```bash +cd /path-to/NeMo-Agent-Toolkit/src/aiq/tool/code_execution +``` + +Run server by mounting your workflow's output folder as an internal volume + +```bash +./local_sandbox/start_local_sandbox.sh local-sandbox \\ +/path-to-output-folder-as-specified-in-config-yml/ +``` + +(eg) + +```bash +./local_sandbox/start_local_sandbox.sh local-sandbox \\ +/path-to/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data +``` + +[Optional] Create a new terminal to test your sandbox by running the python script. + +```bash +./test_code_execution_sandbox.py +``` + +Close the new terminal for testing, you don't need it anymore. + +### Setup Web Interface + +```bash +git clone https://github.com/NVIDIA/AIQToolkit-UI.git +cd AIQToolkit-UI +npm ci +npm run dev +``` +UI available at `http://localhost:3000` + +**Configure UI Settings:** +- Click Settings icon (bottom left) +- Set HTTP URL to `/chat/stream` (recommended) +- Configure theme and WebSocket URL as needed +- Check "Enable intermediate results" and "Enable intermediate results by default" if you prefer to see all the agent calls while the workflow runs. + +## Example Prompts + +Test the system with these prompts: + +**Data Retrieval:** +``` +Retrieve the time in cycles and operational setting 1 from the FD001 test table for unit number 1 and plot its value vs time. +``` + +![Data Retrieval Example](imgs/test_prompt_1.png) + +**Visualization:** +``` +Retrieve real RUL of each unit in the FD001 test dataset. Then plot a distribution of it. +``` + +![Visualization Example](imgs/test_prompt_2.png) + +**Prediction** +``` +Retrieve time in cycles, all sensor measurements and RUL value for engine unit 24 from FD001 test and RUL tables. Predict RUL for it. Finally, generate a plot to compare actual RUL value with predicted RUL value across time. +``` +![Prediction Example](imgs/test_prompt_3.png) + +**Anomaly Detection** +1) Retrieve and detect anomalies in sensor 4 measurements for engine number 78. +2) Retrieve and detect anomalies in sensor 4 for unit 17. + +## Observability (Optional) + +### Monitor your system with Phoenix: + +```bash +# Docker (recommended) +docker run -p 6006:6006 -p 4317:4317 arizephoenix/phoenix:latest + +# Or install as package +uv pip install arize-phoenix +phoenix serve +``` +Access dashboard at `http://localhost:6006` to monitor traces, performance, and costs. + + +## Evaluation + +### Evaluate with AIQ + +Use this command to run the evalutions +```bash +aiq eval --config_file configs/config-reasoning.yml +``` +### Evaluate With Catalyst: + +Follow instructions [here](https://github.com/NVIDIA/NeMo-Agent-Toolkit/blob/develop/docs/source/workflows/observe/observe-workflow-with-catalyst.md) to setup RAGA AI profile +and setup secrets. + +[TBD] + +## Next Steps + +The agent provides a foundation for industrial AI applications. Planned enhancements include: +- Memory layer for context retention +- Parallel tool execution for faster responses +- Action recommendation agent +- Real-time fault detection agent +- Integration with NVIDIA's NV-Tesseract foundation models for improved accuracy. +- Integration with Nemo Retriever for data source context. +- Expansion of eval dataset with complex queries that involve creating Advanced SQL queries like CTEs etc. +--- + +**Resources:** +- [NVIDIA AIQ Toolkit Documentation](https://docs.nvidia.com/aiq-toolkit/) +- [Phoenix Observability](https://phoenix.arize.com/) +- [NV-Tesseract Models](https://developer.nvidia.com/blog/new-nvidia-nv-tesseract-time-series-models-advance-dataset-processing-and-anomaly-detection/) diff --git a/industries/manufacturing/predictive_maintenance_agent/configs/config-reasoning.yml b/industries/manufacturing/predictive_maintenance_agent/configs/config-reasoning.yml new file mode 100644 index 00000000..136effb3 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/configs/config-reasoning.yml @@ -0,0 +1,316 @@ +general: + use_uvloop: true + telemetry: + logging: + console: + _type: console + level: INFO + file: + _type: file + path: "${PWD_PATH}/pdm.log" + level: DEBUG + # Uncomment this to enable tracing + # tracing: + # catalyst: + # _type: catalyst + # project: "pdm-test" + # dataset: "pdm-dataset" + # phoenix: + # _type: phoenix + # endpoint: http://localhost:6006/v1/traces + # project: pdm-test + +llms: + sql_llm: + _type: nim + model_name: "qwen/qwen2.5-coder-32b-instruct" + analyst_llm: + _type: nim + model_name: "qwen/qwen2.5-coder-32b-instruct" + coding_llm: + _type: nim + model_name: "qwen/qwen2.5-coder-32b-instruct" + max_tokens: 4000 + reasoning_llm: + _type: nim + model_name: "nvidia/llama-3.3-nemotron-super-49b-v1" + multimodal_judging_llm: + _type: nim + model_name: meta/llama-3.2-11b-vision-instruct + +embedders: + vanna_embedder: + _type: nim + model_name: "nvidia/nv-embed-v1" + +functions: + sql_retriever: + _type: generate_sql_query_and_retrieve_tool + llm_name: sql_llm + embedding_name: vanna_embedder + vector_store_path: "${PWD_PATH}/database" + db_path: "${PWD_PATH}/database/nasa_turbo.db" + output_folder: "${PWD_PATH}/output_data" + vanna_training_data_path: "${PWD_PATH}/vanna_training_data.yaml" + predict_rul: + _type: predict_rul_tool + output_folder: "${PWD_PATH}/output_data" + scaler_path: "${PWD_PATH}/models/scaler_model.pkl" + model_path: "${PWD_PATH}/models/xgb_model_fd001.pkl" + plot_distribution: + _type: plot_distribution_tool + output_folder: "${PWD_PATH}/output_data" + plot_line_chart: + _type: plot_line_chart_tool + output_folder: "${PWD_PATH}/output_data" + plot_comparison: + _type: plot_comparison_tool + output_folder: "${PWD_PATH}/output_data" + anomaly_detection: + _type: moment_anomaly_detection_tool + output_folder: "${PWD_PATH}/output_data" + plot_anomaly: + _type: plot_anomaly_tool + output_folder: "${PWD_PATH}/output_data" + code_generation_assistant: + _type: code_generation_assistant + llm_name: coding_llm + code_execution_tool: code_execution + output_folder: "${PWD_PATH}/output_data" + verbose: true + code_execution: + _type: code_execution + uri: http://127.0.0.1:6000/execute + sandbox_type: local + max_output_characters: 2000 + data_analysis_assistant: + _type: react_agent + llm_name: analyst_llm + max_iterations: 20 + max_retries: 3 + tool_names: [sql_retriever, code_generation_assistant, predict_rul, plot_distribution, plot_line_chart, plot_comparison, anomaly_detection, plot_anomaly] + system_prompt: | + ### TASK DESCRIPTION #### + You are a helpful data analysis assistant that can help with predictive maintenance tasks for a turbofan engine. + **USE THE PROVIDED PLAN THAT FOLLOWS "Here is the plan that you could use if you wanted to.."** + + ### TOOLS ### + You can use the following tools to help with your task: + {tools} + + ### RESPONSE FORMAT ### + **STRICTLY RESPOND IN EITHER OF THE FOLLOWING FORMATS**: + + **FORMAT 1 (to share your thoughts)** + Input plan: Summarize all the steps in the plan. + Executing step: the step you are currently executing from the plan + Thought: you should always think about what to do + + **FORMAT 2 (to return the final answer)** + Input plan: Summarize all the steps in the plan. + Executing step: highlight the step you are currently executing from the plan + Thought: you should always think about what to do + Final Answer: the final answer to the original input question including a short summary of what the plot is about for example: + + **FORMAT 3 (when using a tool)** + Input plan: Summarize all the steps in the plan. + Executing step: the step you are currently executing from the plan + Thought: you should always think about what to do + Action: the action to take, should be one of [{tool_names}] + Action Input: the input to the tool (if there is no required input, include "Action Input: None") + Observation: wait for the tool to finish execution and return the result + + ### HOW TO CHOOSE THE RIGHT TOOL ### + Follow these guidelines while deciding the right tool to use: + + 1. **SQL Retrieval Tool** + - Use this tool to retrieve data from the database. + - NEVER generate SQL queries by yourself, instead pass the top-level instruction to the tool. + + 2. **Prediction Tools** + - Use predict_rul for RUL prediction requests. + - Always call data retrieval tool to get sensor data before predicting RUL. + + 3. **Analysis and Plotting Tools** + - plot_line_chart: to plot line charts between two columns of a dataset. + - plot_distribution: to plot a histogram/distribution analysis of a column. + - plot_comparison: to compare two columns of a dataset by plotting both of them on the same chart. + + 4. **Anomaly Detection Tools** + - Use anomaly_detection for state-of-the-art foundation model-based anomaly detection using MOMENT-1-Large. + - **REQUIRES JSON DATA**: First use sql_retriever to get sensor data, then pass the JSON file path to anomaly_detection. + - **OUTPUT**: Creates enhanced sensor data with added 'is_anomaly' boolean column. + - Use plot_anomaly to create interactive visualizations of anomaly detection results. + - **WORKFLOW**: sql_retriever → anomaly_detection → plot_anomaly for complete anomaly analysis with visualization. + + 5. **Code Generation Guidelines** + When using code_generation_assistant, provide comprehensive instructions in a single parameter: + • Include complete task description with user context and requirements + • Specify available data files and their structure (columns, format, location) + • Combine multiple related tasks into bullet points within one instruction + • Mention specific output requirements (HTML files, JSON data, visualizations) + • Include file path details and any constraints or preferences + • Add example: "Load 'data.json' with columns A,B,C. Create time series plot. Save as HTML." + • The tool automatically generates and executes Python code, returning results and file paths. + + ### TYPICAL WORKFLOW FOR EXECUTING A PLAN ### + Generate all outputs to this path: "${PWD_PATH}/output_data" + While generating Python code, use "./output_data/filename" to access files in output_data. + When passing files to other tools, use the absolute path: "${PWD_PATH}/output_data/filename". + + First, Data Extraction + - Use SQL retrieval tool to fetch required data + Next, Data Processing and visualization + - Use existing plotting tools to generate plots + - **For Anomaly Detection**: Follow modular workflow: sql_retriever → anomaly_detection → plot_anomaly + - If existing tools are not enough, use code_generation_assistant which will generate and execute custom Python code automatically + Finally, return the result to the user + - Return processed information to calling agent + - The user will interact with you through a web frontend, so you should return HTML files if generated by the code execution tool. + - DO NOT USE MARKDOWN FORMATTING IN YOUR RESPONSE. + - If the code execution tool responds with a warning in the stderr then ignore it and take action based on the stdout. + +workflow: + _type: reasoning_agent + augmented_fn: data_analysis_assistant + llm_name: reasoning_llm + verbose: true + reasoning_prompt_template: | + ### DESCRIPTION ### + You are a Data Analysis Reasoning and Planning Expert specialized in analyzing turbofan engine sensor data and predictive maintenance tasks. + You are tasked with creating detailed execution plans for addressing user queries while being conversational and helpful. + + Your Role and Capabilities:** + - Expert in turbofan engine data analysis, predictive maintenance, and anomaly detection + - Provide conversational responses while maintaining technical accuracy + - Create step-by-step execution plans using available tools which will be invoked by a data analysis assitant + + **You are given a data analysis assistant to execute your plan, all you have to do is generate the plan** + DO NOT USE MARKDOWN FORMATTING IN YOUR RESPONSE. + + ### ASSITANT DESCRIPTION ### + {augmented_function_desc} + + ### TOOLS AVAILABLE TO THE ASSISTANT ### + {tools} + + ### CONTEXT ### + You work with turbofan engine sensor data from multiple engines in a fleet. The data contains: + - **Time series data** from different engines, each with unique wear patterns and operational history separated into + four datasets (FD001, FD002, FD003, FD004), each dataset is further divided into training and test subsets. + - **26 data columns**: unit number, time in cycles, 3 operational settings, and 21 sensor measurements + - **Engine lifecycle**: Engines start operating normally, then develop faults that grow until system failure + - **Predictive maintenance goal**: Predict Remaining Useful Life (RUL) - how many operational cycles before failure + - **Data characteristics**: Contains normal operational variation, sensor noise, and progressive fault development + This context helps you understand user queries about engine health, sensor patterns, failure prediction, and maintenance planning. + REMEMBER TO RELY ON DATA ANALYSIS ASSITANT TO RETRIEVE DATA FROM THE DATABASE. + + ### SPECIAL TASKS ### + Create execution plans for specialized predictive maintenance tasks. For other queries, use standard reasoning. + + ### SPECIAL TASK 0: RUL Comparison (Actual vs Predicted) ### + 1) Retrieve ground truth RUL data for specified engine from database + 2) Predict RUL for same engine using the model + 3) Transform actual RUL to piecewise representation (MAXLIFE=125) using python + 4) Apply the knee_RUL function to the actual RUL column using apply_piecewise_rul_to_data function: calculate true failure point as max_cycle_in_data + final_rul, replace 'actual_RUL' column. + 4) Generate comparison visualization showing the clean piecewise pattern alongside predictions using provided plot comparison tool + + ### GUIDELINES ### + **Generate and return the absolutepath to any files generated by the tools.** + **DO NOT use predict_rul tool to fetch RUL data unless the user explicitly uses the word "predict" or somthing similar, this is because there is also ground truth RUL data in the database which the user might request sometimes.** + **REMEMBER: SQL retrieval tool is smart enough to understand queries like counts, totals, basic facts etc. It can use UNIQUE(), COUNT(), SUM(), AVG(), MIN(), MAX() to answer simple queries. NO NEED TO USE CODE GENERATION ASSISTANT FOR SIMPLE QUERIES.** + **CODE GENERATION ASSISTANT IS COSTLY AND UNRELIABLE MOST OF THE TIMES. SO PLEASE USE IT ONLY FOR COMPLEX QUERIES THAT REQUIRE DATA PROCESSING AND VISUALIZATION.** + + **User Input:** + {input_text} + + Analyze the input and create an appropriate execution plan in bullet points. + +eval: + general: + output: + dir: "${PWD_PATH}/eval_output" + cleanup: true + dataset: + _type: json + file_path: "${PWD_PATH}/eval_data/eval_set_master.json" + query_delay: 10 # seconds between queries + max_concurrent: 1 # process queries sequentially + evaluators: + # final_answer_eval: + # _type: llm_judge + # llm_name: judging_llm + # judge_prompt: | + # You are an expert evaluator for agentic workflow systems. Your task is to evaluate how well a generated answer matches the reference answer for a given question. + + # Question: {question} + + # Reference Answer: {reference_answer} + + # Generated Answer: {generated_answer} + + # Please evaluate the generated answer against the reference answer considering: + # 1. Factual accuracy and correctness of technical information + # 2. Completeness of the response (does it answer all parts of the question?) + # 3. Technical accuracy for predictive maintenance context (RUL predictions, sensor data analysis, etc.) + # 4. Relevance to the question asked + # 5. Quality of data analysis and insights provided + # 6. Appropriate use of predictive maintenance terminology and concepts + + # Provide your evaluation as a JSON object with the following format: + # {{ + # "score": , + # "reasoning": "" + # }} + + # The score should be: + # - 1.0: Perfect match, completely accurate and complete response + # - 0.5: Fair, partially correct but with significant issues or missing information + # - 0.0: Poor, mostly incorrect but some relevant information + multimodal_eval: + _type: multimodal_llm_judge_evaluator + llm_name: multimodal_judging_llm + judge_prompt: | + You are an expert evaluator for predictive maintenance agentic workflows. Your task is to evaluate how well a generated response (which may include both text and visualizations) matches the reference answer for a given question. + + Question: {question} + + Reference Answer: {reference_answer} + + Generated Response: {generated_answer} + + IMPORTANT: You MUST provide your response ONLY as a valid JSON object. Do not include any text before or after the JSON. + + EVALUATION LOGIC: + IMPORTANT: Your evaluation mode is determined by whether actual plot images are attached to this message: + - If PLOT IMAGES are attached to this message: Perform ONLY PLOT EVALUATION by examining the actual plot images + - If NO IMAGES are attached: Perform ONLY TEXT EVALUATION of the text response + + DO NOT confuse text mentions of plots/files with actual attached images. Only evaluate plots if you can actually see plot images in this message. + + TEXT EVALUATION (only when no images are attached): + Check if the generated text answer semantically matches the reference answer (not word-for-word, but meaning and content). Score: + - 1.0: Generated answer fully matches the reference answer semantically + - 0.5: Generated answer partially matches the reference answer with some missing or incorrect elements + - 0.0: Generated answer does not match the reference answer semantically + + PLOT EVALUATION (only when images are attached): + Use the reference answer as the expected plot description and check how well the actual generated plot matches it. Score: + - 1.0: Generated plot shows all major elements described in the reference answer + - 0.5: Generated plot shows some elements described in the reference answer but missing significant aspects + - 0.0: Generated plot does not match the reference answer description + + FINAL SCORING: + Your final score should be based on whichever evaluation type was performed (TEXT or PLOT, not both). + + You MUST respond with ONLY this JSON format: + {{ + "score": 0.0, + "reasoning": "EVALUATION TYPE: [TEXT or PLOT] - [your analysis and score with justification]" + }} + + CRITICAL REMINDER: + - If images are attached → Use "EVALUATION TYPE: PLOT" + - If no images → Use "EVALUATION TYPE: TEXT" + + Replace the score with your actual evaluation (0.0, 0.5, or 1.0). diff --git a/industries/manufacturing/predictive_maintenance_agent/dot.env b/industries/manufacturing/predictive_maintenance_agent/dot.env new file mode 100644 index 00000000..0936eb3a --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/dot.env @@ -0,0 +1,9 @@ +# This .env file contains environment variables for the Predictive Maintenance Agent. +# - PWD_PATH: Sets the current working directory path for config file references. +# - NVIDIA_API_KEY: API key for accessing NVIDIA NIM models. +# - CATALYST_ACCESS_KEY, CATALYST_SECRET_KEY, CATALYST_ENDPOINT: Credentials and endpoint for Catalyst observability integration. +PWD_PATH="$(pwd)" +NVIDIA_API_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +CATALYST_ACCESS_KEY="xxxxxxxxxxxxxxxx" +CATALYST_SECRET_KEY="xxxxxxxxxxxxxxxxxxxxxxxx" +CATALYST_ENDPOINT=https://catalyst.raga.ai/api \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/eval_data/eval_set_master.json b/industries/manufacturing/predictive_maintenance_agent/eval_data/eval_set_master.json new file mode 100644 index 00000000..0d9d9911 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/eval_data/eval_set_master.json @@ -0,0 +1,252 @@ +[ + { + "id": "1", + "question": "What is the ground truth remaining useful life (RUL) of unit_number 59 in dataset FD001", + "answer": "114 ", + "type": "text", + "category": "retrieval", + "subcategory": "easy", + "original_id": "1", + "source": "eval_set" + }, + { + "id": "2", + "question": "What is the ground truth RUL of unit_number 20 in dataset FD001", + "answer": "16 ", + "type": "text", + "category": "retrieval", + "subcategory": "easy", + "original_id": "2", + "source": "eval_set" + }, + { + "id": "3", + "question": "How many units have ground truth RUL of 100 or more in dataset FD003", + "answer": "33 ", + "type": "text", + "category": "retrieval", + "subcategory": "easy", + "original_id": "3", + "source": "eval_set" + }, + { + "id": "4", + "question": "How many units have ground truth RUL of 50 or less in dataset FD002", + "answer": "88 ", + "type": "text", + "category": "retrieval", + "subcategory": "easy", + "original_id": "4", + "source": "eval_set" + }, + { + "id": "5", + "question": "Report the unit_number of the units that have ground truth RUL equal to 155 in FD002", + "answer": "6, 141, 165 ", + "type": "text", + "category": "retrieval", + "subcategory": "easy", + "original_id": "5", + "source": "eval_set" + }, + { + "id": "6", + "question": "In the dataset FD004, how many units have ground truth RUL equal to 10 and what are their unit numbers?", + "answer": "4 units; unit numbers: 40, 82, 174, 184", + "type": "text", + "category": "retrieval", + "subcategory": "medium", + "original_id": "6", + "source": "eval_set" + }, + { + "id": "7", + "question": "In dataset train_FD004, what was the operational_setting_3 at time_in_cycles 20 for unit_number 107", + "answer": "100 ", + "type": "text", + "category": "retrieval", + "subcategory": "easy", + "original_id": "7", + "source": "eval_set" + }, + { + "id": "8", + "question": "In dataset train_FD004, what was the 3rd operational setting at time 20 for unit_number 107", + "answer": "100 ", + "type": "text", + "category": "retrieval", + "subcategory": "medium", + "original_id": "8", + "source": "eval_set" + }, + { + "id": "9", + "question": "In dataset test_FD002, what are the values of the three operational setting for unit_number 56 at time_in_cycles 10", + "answer": "10.0026, 0.25, 100 ", + "type": "text", + "category": "retrieval", + "subcategory": "medium", + "original_id": "9", + "source": "eval_set" + }, + { + "id": "10", + "question": "In dataset test_FD003, what is the value of sensor_measurement_4 for unit_number 25 at time_in_cycles 20", + "answer": "1409.26 ", + "type": "text", + "category": "retrieval", + "subcategory": "easy", + "original_id": "10", + "source": "eval_set" + }, + { + "id": "11", + "question": "How many units have operational_setting_3 equal to 100 in dataset train_FD001 at time_in_cycles 40?", + "answer": "100 ", + "type": "text", + "category": "retrieval", + "subcategory": "medium", + "original_id": "11", + "source": "eval_set" + }, + { + "id": "12", + "question": "How many units have operational_setting_3 equal to 100 in dataset train_FD001?", + "answer": "100 ", + "type": "text", + "category": "retrieval", + "subcategory": "medium", + "original_id": "12", + "source": "eval_set" + }, + { + "id": "13", + "question": "In dataset train_FD003, what was sensor_measurement_20 and sensor_measurement_21 for unit 1 at time_in_cycles 10", + "answer": "38.94, 23.4781 ", + "type": "text", + "category": "retrieval", + "subcategory": "easy", + "original_id": "13", + "source": "eval_set" + }, + { + "id": "14", + "question": "For dataset test_FD004, what is the ground truth remaining useful life of unit 60", + "answer": "139 ", + "type": "text", + "category": "retrieval", + "subcategory": "easy", + "original_id": "14", + "source": "eval_set" + }, + { + "id": "15", + "question": "Using the data in test_FD002, predict the remaining useful life of unit_number 10 at time_in_cycles 84", + "answer": "79 ", + "type": "text", + "category": "prediction", + "subcategory": "medium", + "original_id": "15", + "source": "eval_set" + }, + { + "id": "16", + "question": "Given the data in test_FD003, predict the RUL of unit_number 30", + "answer": "89 ", + "type": "text", + "category": "prediction", + "subcategory": "medium", + "original_id": "16", + "source": "eval_set" + }, + { + "id": "17", + "question": "In dataset train_FD004, plot sensor_measurement1 vs time_in_cycles for unit_number 107", + "answer": "Line chart showing sensor_measurement1 values on y-axis ranging from 445.00 to 518.67 plotted against time_in_cycles in x-axisfor unit 107 in dataset FD004.", + "type": "text_plus_plot", + "category": "visualization", + "subcategory": "easy", + "original_id": "1", + "source": "eval_set" + }, + { + "id": "18", + "question": "In dataset train_FD004, plot the variation of sensor_measurement1 over time for unit_number 107", + "answer": "Line chart displaying the variation of sensor_measurement1 values on y-axis over time cycles in x-axis for unit 107 in dataset FD004. The plot should illustrate how sensor_measurement1 changes across different time cycles, demonstrating the temporal variation pattern of this sensor reading.", + "type": "text_plus_plot", + "category": "visualization", + "subcategory": "easy", + "original_id": "2", + "source": "eval_set" + }, + { + "id": "19", + "question": "In dataset train_FD002, plot operational_setting_3 vs time_in_cycles for unit_number 200", + "answer": "Line chart showing operational_setting_3 values on y-axis ranging against time_in_cycles in x-axis for unit 200 in dataset FD002. Only two values 100 and 60 should be visible on the plot.", + "type": "text_plus_plot", + "category": "visualization", + "subcategory": "easy", + "original_id": "3", + "source": "eval_set" + }, + { + "id": "20", + "question": "Plot a histogram showing distribution of values of operational_setting_3 over time for unit_number 200 in dataset train_FD002", + "answer": "Histogram Two bars for 100 and 60 with higher bar for 100", + "type": "text_plus_plot", + "category": "visualization", + "subcategory": "medium", + "original_id": "4", + "source": "eval_set" + }, + { + "id": "21", + "question": "In dataset test_FD001 plot a histogram showing the distribution of operational_setting_3 across all units", + "answer": "Constant value 100, so just one high bar for 100", + "type": "text_plus_plot", + "category": "visualization", + "subcategory": "medium", + "original_id": "5", + "source": "eval_set" + }, + { + "id": "22", + "question": "In dataset test_FD001 plot operational_setting_3 as a function of time_in_cycles for units 10, 20, 30, 40", + "answer": "Four constant lines at 100", + "type": "text_plus_plot", + "category": "visualization", + "subcategory": "medium", + "original_id": "6", + "source": "eval_set" + }, + { + "id": "23", + "question": "Retrieve RUL of all units from the FD001 and plot their distribution using a histogram", + "answer": "Histogram showing distribution of RUL values for all units in FD001 dataset. Should contain 100 data points representing different RUL values ranging from 7 to 145 cycles. The distribution should show 71 unique RUL values with varying frequencies. The plot should display the spread and frequency of remaining useful life values across all engine units in the dataset.", + "type": "text_plus_plot", + "category": "visualization", + "subcategory": "medium", + "original_id": "7", + "source": "eval_set" + }, + { + "id": "24", + "question": "Retrieve time in cycles, all sensor measurements and RUL value for engine unit 24 from FD001 test and RUL tables. Predict RUL for it. Finally, generate a plot to compare actual RUL value with predicted RUL value across time.", + "answer": "A Plot showing both actual RUL values and predicted RUL values trend (in y-axis) plotted against time in cycles (in x-axis) for engine unit 24", + "type": "text_plus_plot", + "category": "visualization", + "subcategory": "hard", + "original_id": "8", + "source": "eval_set" + }, + { + "id": "25", + "question": "Retrieve and detect anomalies in sensor 4 measurements for engine number 78.", + "answer": "A Plot showing observed values and anomalies in sensor 4 measurements for engine number 78", + "type": "text_plus_plot", + "category": "visualization", + "subcategory": "hard", + "original_id": "9", + "source": "eval_set" + } +] \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/eval_data/eval_set_test.json b/industries/manufacturing/predictive_maintenance_agent/eval_data/eval_set_test.json new file mode 100644 index 00000000..55d5f000 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/eval_data/eval_set_test.json @@ -0,0 +1,32 @@ +[ + { + "id": "1", + "question": "What is the ground truth remaining useful life (RUL) of unit_number 59 in dataset test FD001", + "answer": "114 " + }, + { + "id": "2", + "question": "What is the ground truth RUL of unit_number 20 in dataset test FD001", + "answer": "16 " + }, + { + "id": "3", + "question": "In dataset train_FD004, plot sensor_measurement1 vs time_in_cycles for unit_number 107", + "answer": "Line chart showing sensor_measurement1 values on y-axis ranging from 445.00 to 518.67 plotted against time_in_cycles in x-axis for unit 107 in dataset FD004.", + "type": "text_plus_plot", + "category": "visualization", + "subcategory": "easy", + "original_id": "1", + "source": "eval_set" + }, + { + "id": "2", + "question": "Retrieve and detect anomalies in sensor 4 measurements for engine number 78.", + "answer": "A Plot showing observed values and anomalies in sensor 4 measurements for engine number 78", + "type": "text_plus_plot", + "category": "visualization", + "subcategory": "hard", + "original_id": "9", + "source": "eval_set" + } +] \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/eval_output/example_multimodal_eval_output.json b/industries/manufacturing/predictive_maintenance_agent/eval_output/example_multimodal_eval_output.json new file mode 100644 index 00000000..af5bb9cf --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/eval_output/example_multimodal_eval_output.json @@ -0,0 +1,348 @@ +{ + "average_score": 0.84, + "eval_output_items": [ + { + "id": 1, + "score": 1.0, + "reasoning": { + "question": "What is the ground truth remaining useful life (RUL) of unit_number 59 in dataset FD001", + "reference_answer": "114 ", + "generated_answer": "The ground truth Remaining Useful Life (RUL) for unit_number 59 in dataset FD001 is 114 cycles.", + "llm_judgment": "EVALUATION TYPE: TEXT - The generated response fully matches the reference answer semantically, providing the exact ground truth Remaining Useful Life (RUL) for unit_number 59 in dataset FD001.", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 2, + "score": 1.0, + "reasoning": { + "question": "What is the ground truth RUL of unit_number 20 in dataset FD001", + "reference_answer": "16 ", + "generated_answer": "The ground truth RUL for unit_number 20 in dataset FD001 is 16.", + "llm_judgment": "EVALUATION TYPE: TEXT - The generated response semantically matches the reference answer, providing the exact ground truth RUL for unit_number 20 in dataset FD001.", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 3, + "score": 1.0, + "reasoning": { + "question": "How many units have ground truth RUL of 100 or more in dataset FD003", + "reference_answer": "33 ", + "generated_answer": "There are 33 units in dataset FD003 with a ground truth RUL of 100 or more. The data has been saved to /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/count_units_in_fd003_where_gro_results.json for reference.", + "llm_judgment": "EVALUATION TYPE: TEXT - The generated response semantically matches the reference answer. It accurately states the number of units with a ground truth RUL of 100 or more in dataset FD003 and provides a file path for reference, matching the reference answer's content and meaning.", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 4, + "score": 1.0, + "reasoning": { + "question": "How many units have ground truth RUL of 50 or less in dataset FD002", + "reference_answer": "88 ", + "generated_answer": "There are 88 units in FD002 with a ground truth RUL of 50 or less. The result has been saved to /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/count_units_in_fd002_where_gro_results.json.", + "llm_judgment": "EVALUATION TYPE: TEXT - The generated response semantically matches the reference answer. The generated text accurately states the number of units with a ground truth RUL of 50 or less in dataset FD002, and also mentions the location of the saved result, which is not present in the reference answer but does not affect the semantic match.", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 5, + "score": 1.0, + "reasoning": { + "question": "Report the unit_number of the units that have ground truth RUL equal to 155 in FD002", + "reference_answer": "6, 141, 165 ", + "generated_answer": "The unit numbers from the FD002 dataset that have a ground truth RUL equal to 155 are 6, 141, and 165.", + "llm_judgment": "EVALUATION TYPE: TEXT - The generated response semantically matches the reference answer. The unit numbers mentioned in the generated response (6, 141, and 165) are identical to those in the reference answer, indicating a perfect match.", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 6, + "score": 0.0, + "reasoning": { + "question": "In the dataset FD004, how many units have ground truth RUL equal to 10 and what are their unit numbers?", + "reference_answer": "4 units; unit numbers: 40, 82, 174, 184", + "generated_answer": "In the dataset FD004, there are 4 units with a ground truth RUL equal to 10. The unit numbers are 40, 82, 174, and another unit which is not shown in the sample data but is included in the full result set. The full list of unit numbers is saved in the file `/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/in_fd004_count_units_where_gro_results.json`.", + "llm_judgment": "EVALUATION TYPE: TEXT - The generated response partially matches the reference answer semantically. Although it correctly states the number of units with ground truth RUL equal to 10, it incorrectly mentions that the unit numbers are not shown in the sample data but are included in the full result set. The reference answer explicitly lists the unit numbers, which are not present in the generated response.", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 7, + "score": 1.0, + "reasoning": { + "question": "In dataset train_FD004, what was the operational_setting_3 at time_in_cycles 20 for unit_number 107", + "reference_answer": "100 ", + "generated_answer": "The operational_setting_3 at time_in_cycles 20 for unit_number 107 in the dataset train_FD004 is **100.0**.", + "llm_judgment": "EVALUATION TYPE: TEXT - The generated response fully matches the reference answer semantically, providing the exact value of 100.0 for the operational_setting_3 at time_in_cycles 20 for unit_number 107 in the dataset train_FD004.", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 8, + "score": 1.0, + "reasoning": { + "question": "In dataset train_FD004, what was the 3rd operational setting at time 20 for unit_number 107", + "reference_answer": "100 ", + "generated_answer": "The 3rd operational setting (operational_setting_3) for unit number 107 at time in cycles 20 is 100.0.", + "llm_judgment": "EVALUATION TYPE: TEXT - The generated response fully matches the reference answer semantically, providing the exact value of 100 for the 3rd operational setting at time 20 for unit number 107.", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 9, + "score": 1.0, + "reasoning": { + "question": "In dataset test_FD002, what are the values of the three operational setting for unit_number 56 at time_in_cycles 10", + "reference_answer": "10.0026, 0.25, 100 ", + "generated_answer": "For unit_number 56 at time_in_cycles 10 in test_FD002, the operational settings are: operational_setting_1 = 10.0026, operational_setting_2 = 0.25, and operational_setting_3 = 100.0.", + "llm_judgment": "EVALUATION TYPE: TEXT - The generated response fully matches the reference answer semantically, providing the exact values for the three operational settings for unit_number 56 at time_in_cycles 10 in test_FD002.", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 10, + "score": 1.0, + "reasoning": { + "question": "In dataset test_FD003, what is the value of sensor_measurement_4 for unit_number 25 at time_in_cycles 20", + "reference_answer": "1409.26 ", + "generated_answer": "The value of `sensor_measurement_4` for `unit_number` 25 at `time_in_cycles` 20 in `test_FD003` is **1409.26**. The SQL query executed was: `SELECT sensor_measurement_4 FROM test_FD003 WHERE unit_number = 25 AND time_in_cycles = 20`. This query directly provided the sensor measurement you were looking for. The result is contained in the file: /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/what_is_the_value_of_sensor_me_results.json which has 1 row with the column `sensor_measurement_4`.", + "llm_judgment": "{\n \"score\": 1.0,\n \"reasoning\": \"EVALUATION TYPE: TEXT - The generated response fully matches the reference answer semantically. It provides the exact value of sensor_measurement_4 for unit_number 25 at time_in_cycles 20 in test_FD003, which is 1409.26. The response also includes a detailed description of the SQL query executed and the location of the result in the output file, which further supports the accuracy of the answer.<|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|>", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 11, + "score": 1.0, + "reasoning": { + "question": "How many units have operational_setting_3 equal to 100 in dataset train_FD001 at time_in_cycles 40?", + "reference_answer": "100 ", + "generated_answer": "There are 100 units in the `train_FD001` dataset where `operational_setting_3` is equal to 100 and `time_in_cycles` is equal to 40.", + "llm_judgment": "EVALUATION TYPE: TEXT - The generated text answer fully matches the reference answer semantically, providing the exact number of units with the specified conditions.", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 12, + "score": 1.0, + "reasoning": { + "question": "How many units have operational_setting_3 equal to 100 in dataset train_FD001?", + "reference_answer": "100 ", + "generated_answer": "There are 100 distinct units where `operational_setting_3` equals 100.", + "llm_judgment": "EVALUATION TYPE: TEXT - The generated text answer fully matches the reference answer semantically, providing the exact number of units where operational_setting_3 equals 100.", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 13, + "score": 1.0, + "reasoning": { + "question": "In dataset train_FD003, what was sensor_measurement_20 and sensor_measurement_21 for unit 1 at time_in_cycles 10", + "reference_answer": "38.94, 23.4781 ", + "generated_answer": "The sensor measurements for unit 1 at the 10th cycle from the `train_FD003` table are:\n- **sensor_measurement_20**: 38.94\n- **sensor_measurement_21**: 23.4781", + "llm_judgment": "EVALUATION TYPE: TEXT - The generated answer provides a clear and concise description of the sensor measurements, matching the reference answer semantically with no missing or incorrect elements, resulting in a perfect score.", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 14, + "score": 1.0, + "reasoning": { + "question": "For dataset test_FD004, what is the ground truth remaining useful life of unit 60", + "reference_answer": "139 ", + "generated_answer": "The ground truth Remaining Useful Life (RUL) for unit 60 from the `test_FD004` dataset is **139**.", + "llm_judgment": "EVALUATION TYPE: TEXT - The generated answer is a direct quote of the reference answer, indicating a perfect match semantically.", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 15, + "score": 0.0, + "reasoning": { + "question": "Using the data in test_FD002, predict the remaining useful life of unit_number 10 at time_in_cycles 84", + "reference_answer": "79 ", + "generated_answer": "I seem to be having a problem.", + "llm_judgment": "{\n \"score\": 0.0,\n \"reasoning\": \"EVALUATION TYPE: TEXT - The generated answer does not match the reference answer semantically, it does not provide any information about the remaining useful life<|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|><|reserved_special_token_246|>", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "TEXT" + } + }, + { + "id": 16, + "score": 0.0, + "reasoning": { + "question": "Given the data in test_FD003, predict the RUL of unit_number 30", + "reference_answer": "89 ", + "generated_answer": "The RUL predictions for unit_number 30 from test_FD003 have been generated and visualized. The average predicted RUL is 108.74 cycles, with a minimum of 79.36 cycles and a maximum of 123.95 cycles. The standard deviation of the predictions is 9.49 cycles. The line chart showing the predicted RUL over time is available in the following formats:\n- Interactive HTML file: /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/line_chart_time_in_cycles_vs_predicted_RUL.html\n- Static PNG file: /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/line_chart_time_in_cycles_vs_predicted_RUL.png\n\nThe plot displays the predicted RUL for unit 30 over the number of cycles, providing a clear visualization of the remaining useful life predictions.", + "llm_judgment": "**EVALUATION TYPE: PLOT**\n\nThe generated plot shows the predicted RUL for unit 30 over time, with a clear visualization of the remaining useful life predictions. The plot displays the predicted RUL for unit 30 over the number of cycles, providing a clear visualization of the remaining useful life predictions.\n\nUpon examining the plot, it appears that the predicted RUL values are generally higher than the reference answer of 89 cycles. The plot shows a range of predicted RUL values, with some values exceeding 100 cycles. However, the plot does not provide a clear indication of the exact RUL value for unit 30.\n\n**Score: 0.0**\n\nThe generated plot does not match the reference answer description, as it does not provide a clear indication of the exact RUL value for unit 30. The plot shows a range of predicted RUL values, but it does not provide a specific value that matches the reference answer of 89 cycles.", + "plot_paths": [ + "/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/line_chart_time_in_cycles_vs_predicted_RUL.png" + ], + "num_images_analyzed": 1, + "evaluation_type": "PLOT" + } + }, + { + "id": 17, + "score": 1.0, + "reasoning": { + "question": "In dataset train_FD004, plot sensor_measurement1 vs time_in_cycles for unit_number 107", + "reference_answer": "Line chart showing sensor_measurement1 values on y-axis ranging from 445.00 to 518.67 plotted against time_in_cycles in x-axisfor unit 107 in dataset FD004.", + "generated_answer": "The interactive line chart for sensor_measurement1 vs time_in_cycles for unit_number 107 from the train_FD004 dataset has been generated. The chart shows the variation of sensor measurement 1 over time in cycles for the specified unit. The HTML file can be viewed at the following URL: file:///Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/line_chart_time_in_cycles_vs_sensor_measurement_1.html, and the static PNG file is located at /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/line_chart_time_in_cycles_vs_sensor_measurement_1.png.", + "llm_judgment": "EVALUATION TYPE: PLOT - The generated plot shows all major elements described in the reference answer, including the line chart with sensor_measurement1 values on the y-axis ranging from 445.00 to 518.67 plotted against time_in_cycles in the x-axis for unit 107 in dataset FD004. The plot matches the reference answer description perfectly.", + "plot_paths": [ + "/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/line_chart_time_in_cycles_vs_sensor_measurement_1.png" + ], + "num_images_analyzed": 1, + "evaluation_type": "PLOT" + } + }, + { + "id": 18, + "score": 1.0, + "reasoning": { + "question": "In dataset train_FD004, plot the variation of sensor_measurement1 over time for unit_number 107", + "reference_answer": "Line chart displaying the variation of sensor_measurement1 values on y-axis over time cycles in x-axis for unit 107 in dataset FD004. The plot should illustrate how sensor_measurement1 changes across different time cycles, demonstrating the temporal variation pattern of this sensor reading.", + "generated_answer": "The variation of sensor_measurement1 over time for unit_number 107 in the train_FD004 dataset has been plotted. The visualization is available in both interactive HTML and static PNG formats. You can view the interactive chart by opening the HTML file located at `/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/line_chart_time_in_cycles_vs_sensor_measurement_1.html` or by clicking [here](file:///Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/line_chart_time_in_cycles_vs_sensor_measurement_1.html). The static PNG version of the chart is located at `/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/line_chart_time_in_cycles_vs_sensor_measurement_1.png`.", + "llm_judgment": "EVALUATION TYPE: PLOT - The generated plot matches the reference answer description perfectly, showing all major elements described, including the variation of sensor_measurement1 values over time cycles for unit 107 in dataset FD004.", + "plot_paths": [ + "/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/line_chart_time_in_cycles_vs_sensor_measurement_1.png" + ], + "num_images_analyzed": 1, + "evaluation_type": "PLOT" + } + }, + { + "id": 19, + "score": 1.0, + "reasoning": { + "question": "In dataset train_FD002, plot operational_setting_3 vs time_in_cycles for unit_number 200", + "reference_answer": "Line chart showing operational_setting_3 values on y-axis ranging against time_in_cycles in x-axis for unit 200 in dataset FD002. Only two values 100 and 60 should be visible on the plot.", + "generated_answer": "The interactive line chart plotting operational_setting_3 vs time_in_cycles for unit_number 200 from the train_FD002 dataset has been generated. You can view the chart by opening the HTML file located at `/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/line_chart_time_in_cycles_vs_operational_setting_3.html` or by viewing the static PNG file at `/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/line_chart_time_in_cycles_vs_operational_setting_3.png`. The chart shows how operational_setting_3 changes over time_in_cycles for unit_number 200.", + "llm_judgment": "EVALUATION TYPE: PLOT - The generated plot shows all major elements described in the reference answer, including the line chart with operational_setting_3 values on the y-axis and time_in_cycles on the x-axis for unit 200 in dataset FD002. The plot only shows two values, 100 and 60, which matches the reference answer.", + "plot_paths": [ + "/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/line_chart_time_in_cycles_vs_operational_setting_3.png" + ], + "num_images_analyzed": 1, + "evaluation_type": "PLOT" + } + }, + { + "id": 20, + "score": 1.0, + "reasoning": { + "question": "Plot a histogram showing distribution of values of operational_setting_3 over time for unit_number 200 in dataset train_FD002", + "reference_answer": "Histogram Two bars for 100 and 60 with higher bar for 100", + "generated_answer": "The distribution histogram for `operational_setting_3` over time for unit_number 200 in the `train_FD002` dataset has been generated. The plot shows the distribution of `operational_setting_3` values. You can view the interactive HTML plot at the following URL: file:///Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/distribution_plot_operational_setting_3.html, or open the static PNG file located at /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/distribution_plot_operational_setting_3.png.", + "llm_judgment": "EVALUATION TYPE: PLOT - The generated plot shows two bars for 100 and 60, with the higher bar for 100, matching the reference answer description.", + "plot_paths": [ + "/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/distribution_plot_operational_setting_3.png" + ], + "num_images_analyzed": 1, + "evaluation_type": "PLOT" + } + }, + { + "id": 21, + "score": 1.0, + "reasoning": { + "question": "In dataset test_FD001 plot a histogram showing the distribution of operational_setting_3 across all units", + "reference_answer": "Constant value 100, so just one high bar for 100", + "generated_answer": "Histogram for Operational Setting 3 in test_FD001 generated successfully. \nInteractive Version: /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/distribution_plot_operational_setting_3.html\nStatic Version: /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/distribution_plot_operational_setting_3.png\n\nThe histogram provides a visual representation of the distribution of operational_setting_3 values across all units in the test_FD001 dataset.", + "llm_judgment": "EVALUATION TYPE: PLOT - The generated plot shows a single high bar for 100, matching the reference answer description of a constant value 100.", + "plot_paths": [ + "/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/distribution_plot_operational_setting_3.png" + ], + "num_images_analyzed": 1, + "evaluation_type": "PLOT" + } + }, + { + "id": 22, + "score": 0.0, + "reasoning": { + "error": "Unified evaluation failed: [400] Bad Request\nAt most 1 image(s) may be provided in one request.", + "question": "In dataset test_FD001 plot operational_setting_3 as a function of time_in_cycles for units 10, 20, 30, 40", + "reference_answer": "Four constant lines at 100", + "generated_answer": "The line charts for operational_setting_3 as a function of time_in_cycles for units 10, 20, 30, and 40 have been generated. The HTML files are located in the specified output directory:\n\n- Unit 10: /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/unit_10_operational_setting_3.html\n- Unit 20: /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/unit_20_operational_setting_3.html\n- Unit 30: /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/unit_30_operational_setting_3.html\n- Unit 40: /Users/vik0: /Users/vik", + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "ERROR" + } + }, + { + "id": 23, + "score": 1.0, + "reasoning": { + "question": "Retrieve RUL of all units from the FD001 and plot their distribution using a histogram", + "reference_answer": "Histogram showing distribution of RUL values for all units in FD001 dataset. Should contain 100 data points representing different RUL values ranging from 7 to 145 cycles. The distribution should show 71 unique RUL values with varying frequencies. The plot should display the spread and frequency of remaining useful life values across all engine units in the dataset.", + "generated_answer": "The distribution histogram of the RUL for all units in the FD001 dataset has been generated. The plot shows the distribution of Remaining Useful Life values across the 100 engine units. The histogram is available in both HTML and PNG formats at the following paths:\n- HTML File: /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/distribution_plot_RUL.html\n- PNG File: /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/distribution_plot_RUL.png\n\nYou can view the interactive HTML plot by opening the HTML file in a web browser.", + "llm_judgment": "EVALUATION TYPE: PLOT - The generated plot shows all major elements described in the reference answer, including the distribution of RUL values for all units in FD001 dataset, 100 data points representing different RUL values ranging from 7 to 145 cycles, and 71 unique RUL values with varying frequencies. The plot displays the spread and frequency of remaining useful life values across all engine units in the dataset, matching the reference answer description.", + "plot_paths": [ + "/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/distribution_plot_RUL.png" + ], + "num_images_analyzed": 1, + "evaluation_type": "PLOT" + } + }, + { + "id": 24, + "score": 1.0, + "reasoning": { + "question": "Retrieve time in cycles, all sensor measurements and RUL value for engine unit 24 from FD001 test and RUL tables. Predict RUL for it. Finally, generate a plot to compare actual RUL value with predicted RUL value across time.", + "reference_answer": "A Plot showing both actual RUL values and predicted RUL values trend (in y-axis) plotted against time in cycles (in x-axis) for engine unit 24", + "generated_answer": "The comparison plot has been generated and saved in the following locations:\n- HTML File: /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/comparison_plot_actual_RUL_vs_predicted_RUL.html\n- PNG File: /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/comparison_plot_actual_RUL_vs_predicted_RUL.png\n\nThe plot compares the actual RUL (dashed teal line) with the predicted RUL (solid green line) across time in cycles for engine unit 24. This visualization helps in assessing the accuracy of the RUL predictions.", + "llm_judgment": "EVALUATION TYPE: PLOT - The generated plot shows both actual RUL values and predicted RUL values trend plotted against time in cycles for engine unit 24, matching the reference answer description.", + "plot_paths": [ + "/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/comparison_plot_actual_RUL_vs_predicted_RUL.png" + ], + "num_images_analyzed": 1, + "evaluation_type": "PLOT" + } + }, + { + "id": 25, + "score": 1.0, + "reasoning": { + "question": "Retrieve and detect anomalies in sensor 4 measurements for engine number 78.", + "reference_answer": "A Plot showing observed values and anomalies in sensor 4 measurements for engine number 78", + "generated_answer": "The anomaly detection for sensor 4 measurements in engine number 78 has been completed. The results show that out of 72 data points, 4 were identified as anomalies. The visualization is available in the following files:\n- Interactive HTML plot: /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/anomaly_plot_sensor_measurement_4_engine78.html\n- PNG image: /Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/anomaly_plot_sensor_measurement_4_engine78.png\n\nThe plot displays the observed sensor readings with blue lines and highlights the detected anomalies with red markers. The interactive plot allows for zooming and hovering over data points for detailed inspection.", + "llm_judgment": "EVALUATION TYPE: PLOT - The generated plot matches the reference answer description perfectly, showing all major elements including observed values, anomalies, and interactive features.", + "plot_paths": [ + "/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data/anomaly_plot_sensor_measurement_4_engine78.png" + ], + "num_images_analyzed": 1, + "evaluation_type": "PLOT" + } + } + ] +} \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/imgs/intermediate_steps.png b/industries/manufacturing/predictive_maintenance_agent/imgs/intermediate_steps.png new file mode 100644 index 00000000..b0858ae6 Binary files /dev/null and b/industries/manufacturing/predictive_maintenance_agent/imgs/intermediate_steps.png differ diff --git a/industries/manufacturing/predictive_maintenance_agent/imgs/pred_maint_arch_diagram_img1.png b/industries/manufacturing/predictive_maintenance_agent/imgs/pred_maint_arch_diagram_img1.png new file mode 100644 index 00000000..99d5293a Binary files /dev/null and b/industries/manufacturing/predictive_maintenance_agent/imgs/pred_maint_arch_diagram_img1.png differ diff --git a/industries/manufacturing/predictive_maintenance_agent/imgs/pred_maint_arch_diagram_img2.png b/industries/manufacturing/predictive_maintenance_agent/imgs/pred_maint_arch_diagram_img2.png new file mode 100644 index 00000000..714fca15 Binary files /dev/null and b/industries/manufacturing/predictive_maintenance_agent/imgs/pred_maint_arch_diagram_img2.png differ diff --git a/industries/manufacturing/predictive_maintenance_agent/imgs/test_prompt_1.png b/industries/manufacturing/predictive_maintenance_agent/imgs/test_prompt_1.png new file mode 100644 index 00000000..06120de7 Binary files /dev/null and b/industries/manufacturing/predictive_maintenance_agent/imgs/test_prompt_1.png differ diff --git a/industries/manufacturing/predictive_maintenance_agent/imgs/test_prompt_2.png b/industries/manufacturing/predictive_maintenance_agent/imgs/test_prompt_2.png new file mode 100644 index 00000000..83aa8387 Binary files /dev/null and b/industries/manufacturing/predictive_maintenance_agent/imgs/test_prompt_2.png differ diff --git a/industries/manufacturing/predictive_maintenance_agent/imgs/test_prompt_3.png b/industries/manufacturing/predictive_maintenance_agent/imgs/test_prompt_3.png new file mode 100644 index 00000000..e1fa8fb5 Binary files /dev/null and b/industries/manufacturing/predictive_maintenance_agent/imgs/test_prompt_3.png differ diff --git a/industries/manufacturing/predictive_maintenance_agent/models/scaler_model.pkl b/industries/manufacturing/predictive_maintenance_agent/models/scaler_model.pkl new file mode 100644 index 00000000..00e6785f Binary files /dev/null and b/industries/manufacturing/predictive_maintenance_agent/models/scaler_model.pkl differ diff --git a/industries/manufacturing/predictive_maintenance_agent/models/xgb_model_fd001.pkl b/industries/manufacturing/predictive_maintenance_agent/models/xgb_model_fd001.pkl new file mode 100644 index 00000000..9996ec93 Binary files /dev/null and b/industries/manufacturing/predictive_maintenance_agent/models/xgb_model_fd001.pkl differ diff --git a/industries/manufacturing/predictive_maintenance_agent/pyproject.toml b/industries/manufacturing/predictive_maintenance_agent/pyproject.toml new file mode 100644 index 00000000..9cb39b1d --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/pyproject.toml @@ -0,0 +1,42 @@ +[build-system] +build-backend = "setuptools.build_meta" +requires = ["setuptools >= 64"] + +[project] +name = "predictive_maintenance_agent" +version = "0.1.0" +dependencies = [ + "aiqtoolkit[profiling, langchain, telemetry]", + "momentfm", + "pydantic ~= 2.10.0, <2.11.0", + "vanna==0.7.9", + "chromadb", + "xgboost", + "matplotlib", + "torch", + "pytest", + "pytest-asyncio" +] +requires-python = ">=3.11,<3.13" +description = "Predictive maintenance workflow using AIQ" +classifiers = ["Programming Language :: Python"] +authors = [{ name = "Vineeth Kalluru" }] +maintainers = [{ name = "NVIDIA Corporation" }] + +[project.entry-points.'aiq.components'] +predictive_maintenance_agent = "predictive_maintenance_agent.register" + +[tool.uv.sources] +momentfm = { path = "/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/moment", editable = true } + +[tool.pytest.ini_options] +asyncio_mode = "auto" +markers = [ + "e2e: end-to-end tests that run full workflows", +] +testpaths = [ + ".", +] +python_files = ["test_*.py", "*_test.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] diff --git a/industries/manufacturing/predictive_maintenance_agent/setup_database.py b/industries/manufacturing/predictive_maintenance_agent/setup_database.py new file mode 100644 index 00000000..4bf5ad22 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/setup_database.py @@ -0,0 +1,359 @@ +#!/usr/bin/env python3 +""" +NASA Turbofan Engine Dataset to SQLite Database Converter + +This script converts the NASA Turbofan Engine Degradation Simulation Dataset (C-MAPSS) +from text files into a structured SQLite database for use with the predictive maintenance agent. + +The NASA dataset contains: +- Training data: Engine run-to-failure trajectories +- Test data: Engine trajectories of unknown remaining cycles +- RUL data: Ground truth remaining useful life values + +Dataset structure: +- unit_number: Engine unit identifier +- time_in_cycles: Operational time cycles +- operational_setting_1, 2, 3: Operating conditions +- sensor_measurement_1 to 21: Sensor readings +""" + +import sqlite3 +import pandas as pd +import numpy as np +import os +from pathlib import Path +import logging + +# Set up logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +class NASADatasetProcessor: + """Processes NASA Turbofan Engine Dataset and creates SQLite database.""" + + def __init__(self, data_dir: str = "data", db_path: str = "PredM_db/nasa_turbo.db"): + """ + Initialize the processor. + + Args: + data_dir: Directory containing NASA dataset text files + db_path: Path where SQLite database will be created + """ + self.data_dir = Path(data_dir) + self.db_path = Path(db_path) + + # Ensure database directory exists + self.db_path.parent.mkdir(exist_ok=True) + + # Define column names for the dataset + self.columns = [ + 'unit_number', 'time_in_cycles', + 'operational_setting_1', 'operational_setting_2', 'operational_setting_3', + 'sensor_measurement_1', 'sensor_measurement_2', 'sensor_measurement_3', + 'sensor_measurement_4', 'sensor_measurement_5', 'sensor_measurement_6', + 'sensor_measurement_7', 'sensor_measurement_8', 'sensor_measurement_9', + 'sensor_measurement_10', 'sensor_measurement_11', 'sensor_measurement_12', + 'sensor_measurement_13', 'sensor_measurement_14', 'sensor_measurement_15', + 'sensor_measurement_16', 'sensor_measurement_17', 'sensor_measurement_18', + 'sensor_measurement_19', 'sensor_measurement_20', 'sensor_measurement_21' + ] + + # Sensor descriptions for metadata + self.sensor_descriptions = { + 'sensor_measurement_1': 'Total temperature at fan inlet (°R)', + 'sensor_measurement_2': 'Total temperature at LPC outlet (°R)', + 'sensor_measurement_3': 'Total temperature at HPC outlet (°R)', + 'sensor_measurement_4': 'Total temperature at LPT outlet (°R)', + 'sensor_measurement_5': 'Pressure at fan inlet (psia)', + 'sensor_measurement_6': 'Total pressure in bypass-duct (psia)', + 'sensor_measurement_7': 'Total pressure at HPC outlet (psia)', + 'sensor_measurement_8': 'Physical fan speed (rpm)', + 'sensor_measurement_9': 'Physical core speed (rpm)', + 'sensor_measurement_10': 'Engine pressure ratio (P50/P2)', + 'sensor_measurement_11': 'Static pressure at HPC outlet (psia)', + 'sensor_measurement_12': 'Ratio of fuel flow to Ps30 (pps/psi)', + 'sensor_measurement_13': 'Corrected fan speed (rpm)', + 'sensor_measurement_14': 'Corrected core speed (rpm)', + 'sensor_measurement_15': 'Bypass Ratio', + 'sensor_measurement_16': 'Burner fuel-air ratio', + 'sensor_measurement_17': 'Bleed Enthalpy', + 'sensor_measurement_18': 'Required fan speed', + 'sensor_measurement_19': 'Required fan conversion speed', + 'sensor_measurement_20': 'High-pressure turbines Cool air flow', + 'sensor_measurement_21': 'Low-pressure turbines Cool air flow' + } + + def read_data_file(self, file_path: Path) -> pd.DataFrame: + """ + Read a NASA dataset text file and return as DataFrame. + + Args: + file_path: Path to the text file + + Returns: + DataFrame with proper column names + """ + try: + # Read space-separated text file + df = pd.read_csv(file_path, sep='\s+', header=None, names=self.columns) + logger.info(f"Loaded {len(df)} records from {file_path.name}") + return df + except Exception as e: + logger.error(f"Error reading {file_path}: {e}") + return pd.DataFrame() + + def process_training_data(self, conn: sqlite3.Connection): + """Process training data files and create database tables.""" + logger.info("Processing training data...") + + training_files = [ + 'train_FD001.txt', 'train_FD002.txt', 'train_FD003.txt', 'train_FD004.txt' + ] + + all_training_data = [] + + for file_name in training_files: + file_path = self.data_dir / file_name + if file_path.exists(): + df = self.read_data_file(file_path) + if not df.empty: + # Add dataset identifier + df['dataset'] = file_name.replace('train_', '').replace('.txt', '') + + # Calculate RUL for training data (max cycle - current cycle) + df['RUL'] = df.groupby('unit_number')['time_in_cycles'].transform('max') - df['time_in_cycles'] + + all_training_data.append(df) + else: + logger.warning(f"Training file not found: {file_path}") + + if all_training_data: + training_df = pd.concat(all_training_data, ignore_index=True) + training_df.to_sql('training_data', conn, if_exists='replace', index=False) + logger.info(f"Created training_data table with {len(training_df)} records") + + def process_test_data(self, conn: sqlite3.Connection): + """Process test data files and create database tables.""" + logger.info("Processing test data...") + + test_files = [ + 'test_FD001.txt', 'test_FD002.txt', 'test_FD003.txt', 'test_FD004.txt' + ] + + all_test_data = [] + + for file_name in test_files: + file_path = self.data_dir / file_name + if file_path.exists(): + df = self.read_data_file(file_path) + if not df.empty: + # Add dataset identifier + df['dataset'] = file_name.replace('test_', '').replace('.txt', '') + all_test_data.append(df) + else: + logger.warning(f"Test file not found: {file_path}") + + if all_test_data: + test_df = pd.concat(all_test_data, ignore_index=True) + test_df.to_sql('test_data', conn, if_exists='replace', index=False) + logger.info(f"Created test_data table with {len(test_df)} records") + + def process_rul_data(self, conn: sqlite3.Connection): + """Process RUL (Remaining Useful Life) data files.""" + logger.info("Processing RUL data...") + + rul_files = [ + 'RUL_FD001.txt', 'RUL_FD002.txt', 'RUL_FD003.txt', 'RUL_FD004.txt' + ] + + all_rul_data = [] + + for file_name in rul_files: + file_path = self.data_dir / file_name + if file_path.exists(): + try: + # RUL files contain one RUL value per line for each test engine + rul_values = pd.read_csv(file_path, header=None, names=['RUL']) + rul_values['unit_number'] = range(1, len(rul_values) + 1) + rul_values['dataset'] = file_name.replace('RUL_', '').replace('.txt', '') + all_rul_data.append(rul_values[['unit_number', 'dataset', 'RUL']]) + logger.info(f"Loaded {len(rul_values)} RUL values from {file_name}") + except Exception as e: + logger.error(f"Error reading RUL file {file_path}: {e}") + else: + logger.warning(f"RUL file not found: {file_path}") + + if all_rul_data: + rul_df = pd.concat(all_rul_data, ignore_index=True) + rul_df.to_sql('rul_data', conn, if_exists='replace', index=False) + logger.info(f"Created rul_data table with {len(rul_df)} records") + + def create_metadata_tables(self, conn: sqlite3.Connection): + """Create metadata tables with sensor descriptions and dataset information.""" + logger.info("Creating metadata tables...") + + # Sensor metadata + sensor_metadata = pd.DataFrame([ + {'sensor_name': sensor, 'description': desc} + for sensor, desc in self.sensor_descriptions.items() + ]) + sensor_metadata.to_sql('sensor_metadata', conn, if_exists='replace', index=False) + + # Dataset metadata + dataset_metadata = pd.DataFrame([ + {'dataset': 'FD001', 'description': 'Sea level conditions', 'fault_modes': 1}, + {'dataset': 'FD002', 'description': 'Sea level conditions', 'fault_modes': 6}, + {'dataset': 'FD003', 'description': 'High altitude conditions', 'fault_modes': 1}, + {'dataset': 'FD004', 'description': 'High altitude conditions', 'fault_modes': 6} + ]) + dataset_metadata.to_sql('dataset_metadata', conn, if_exists='replace', index=False) + + logger.info("Created metadata tables") + + def create_indexes(self, conn: sqlite3.Connection): + """Create database indexes for better query performance.""" + logger.info("Creating database indexes...") + + indexes = [ + "CREATE INDEX IF NOT EXISTS idx_training_unit ON training_data(unit_number)", + "CREATE INDEX IF NOT EXISTS idx_training_dataset ON training_data(dataset)", + "CREATE INDEX IF NOT EXISTS idx_training_cycle ON training_data(time_in_cycles)", + "CREATE INDEX IF NOT EXISTS idx_test_unit ON test_data(unit_number)", + "CREATE INDEX IF NOT EXISTS idx_test_dataset ON test_data(dataset)", + "CREATE INDEX IF NOT EXISTS idx_test_cycle ON test_data(time_in_cycles)", + "CREATE INDEX IF NOT EXISTS idx_rul_unit ON rul_data(unit_number, dataset)" + ] + + for index_sql in indexes: + conn.execute(index_sql) + + conn.commit() + logger.info("Created database indexes") + + def create_views(self, conn: sqlite3.Connection): + """Create useful database views for common queries.""" + logger.info("Creating database views...") + + # View for latest sensor readings per engine + latest_readings_view = """ + CREATE VIEW IF NOT EXISTS latest_sensor_readings AS + SELECT t1.* + FROM training_data t1 + INNER JOIN ( + SELECT unit_number, dataset, MAX(time_in_cycles) as max_cycle + FROM training_data + GROUP BY unit_number, dataset + ) t2 ON t1.unit_number = t2.unit_number + AND t1.dataset = t2.dataset + AND t1.time_in_cycles = t2.max_cycle + """ + + # View for engine health summary + engine_health_view = """ + CREATE VIEW IF NOT EXISTS engine_health_summary AS + SELECT + unit_number, + dataset, + MAX(time_in_cycles) as total_cycles, + MIN(RUL) as final_rul, + AVG(sensor_measurement_1) as avg_fan_inlet_temp, + AVG(sensor_measurement_11) as avg_hpc_outlet_pressure, + AVG(sensor_measurement_21) as avg_lpt_cool_air_flow + FROM training_data + GROUP BY unit_number, dataset + """ + + conn.execute(latest_readings_view) + conn.execute(engine_health_view) + conn.commit() + logger.info("Created database views") + + def validate_database(self, conn: sqlite3.Connection): + """Validate the created database by running sample queries.""" + logger.info("Validating database...") + + validation_queries = [ + ("Training data count", "SELECT COUNT(*) FROM training_data"), + ("Test data count", "SELECT COUNT(*) FROM test_data"), + ("RUL data count", "SELECT COUNT(*) FROM rul_data"), + ("Unique engines in training", "SELECT COUNT(DISTINCT unit_number) FROM training_data"), + ("Datasets available", "SELECT DISTINCT dataset FROM training_data"), + ] + + for description, query in validation_queries: + try: + result = conn.execute(query).fetchone() + logger.info(f"{description}: {result[0] if isinstance(result[0], (int, float)) else result}") + except Exception as e: + logger.error(f"Validation query failed - {description}: {e}") + + def process_dataset(self): + """Main method to process the entire NASA dataset.""" + logger.info(f"Starting NASA dataset processing...") + logger.info(f"Data directory: {self.data_dir.absolute()}") + logger.info(f"Database path: {self.db_path.absolute()}") + + # Check if data directory exists + if not self.data_dir.exists(): + logger.error(f"Data directory not found: {self.data_dir}") + logger.info("Please download the NASA Turbofan Engine Degradation Simulation Dataset") + logger.info("and place the text files in the 'data' directory") + return False + + try: + # Connect to SQLite database + with sqlite3.connect(self.db_path) as conn: + logger.info(f"Connected to database: {self.db_path}") + + # Process all data files + self.process_training_data(conn) + self.process_test_data(conn) + self.process_rul_data(conn) + self.create_metadata_tables(conn) + self.create_indexes(conn) + self.create_views(conn) + + # Validate the database + self.validate_database(conn) + + logger.info("Database processing completed successfully!") + return True + + except Exception as e: + logger.error(f"Error processing database: {e}") + return False + +def main(): + """Main function to run the database setup.""" + import argparse + + parser = argparse.ArgumentParser(description="Convert NASA Turbofan Dataset to SQLite") + parser.add_argument("--data-dir", default="data", + help="Directory containing NASA dataset text files") + parser.add_argument("--db-path", default="PredM_db/nasa_turbo.db", + help="Path for output SQLite database") + + args = parser.parse_args() + + processor = NASADatasetProcessor(args.data_dir, args.db_path) + success = processor.process_dataset() + + if success: + print(f"\n✅ Database created successfully at: {args.db_path}") + print("\nDatabase contains the following tables:") + print("- training_data: Engine run-to-failure trajectories") + print("- test_data: Engine test trajectories") + print("- rul_data: Ground truth RUL values") + print("- sensor_metadata: Sensor descriptions") + print("- dataset_metadata: Dataset information") + print("\nUseful views created:") + print("- latest_sensor_readings: Latest readings per engine") + print("- engine_health_summary: Engine health statistics") + else: + print("\n❌ Database creation failed. Check the logs above.") + return 1 + + return 0 + +if __name__ == "__main__": + exit(main()) \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/__init__.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/__init__.py new file mode 100644 index 00000000..284af8a6 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/__init__.py @@ -0,0 +1,14 @@ +""" +Evaluators package for predictive maintenance agent. + +This package contains evaluator implementations for assessing the quality +of responses from the predictive maintenance agent workflow. +""" + +from .llm_judge_evaluator import LLMJudgeEvaluator +from .multimodal_llm_judge_evaluator import MultimodalLLMJudgeEvaluator + +__all__ = [ + "LLMJudgeEvaluator", + "MultimodalLLMJudgeEvaluator", +] \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/llm_judge_evaluator.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/llm_judge_evaluator.py new file mode 100644 index 00000000..33beb372 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/llm_judge_evaluator.py @@ -0,0 +1,166 @@ +import asyncio +import logging +from typing import Any, Dict, Union + +from langchain_core.language_models import BaseChatModel +from langchain_core.messages import HumanMessage +from langchain_core.prompts import ChatPromptTemplate + +from aiq.eval.evaluator.base_evaluator import BaseEvaluator +from aiq.eval.evaluator.evaluator_model import EvalInputItem, EvalOutputItem + +logger = logging.getLogger(__name__) + + +class LLMJudgeEvaluator(BaseEvaluator): + """ + LLM-as-a-Judge evaluator that uses a large language model to evaluate + how well the generated response matches the reference answer. + """ + + def __init__( + self, + llm: BaseChatModel, + judge_prompt: str, + max_concurrency: int = 4, + ): + super().__init__(max_concurrency=max_concurrency, tqdm_desc="LLM Judge Evaluating") + self.llm = llm + self.judge_prompt = judge_prompt + + # Create the prompt template + self.prompt_template = ChatPromptTemplate.from_template(self.judge_prompt) + logger.debug("LLM Judge evaluator initialized with custom prompt.") + + async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem: + """ + Evaluate a single EvalInputItem using LLM-as-a-judge. + + The judge_prompt should contain placeholders for: + - {question}: The original question/input + - {reference_answer}: The expected/reference answer + - {generated_answer}: The model's generated answer + + The LLM should return a JSON object with 'score' and 'reasoning' fields. + """ + question = str(item.input_obj) if item.input_obj else "" + reference_answer = str(item.expected_output_obj) if item.expected_output_obj else "" + generated_answer = str(item.output_obj) if item.output_obj else "" + + try: + # Format the prompt with the actual values + messages = self.prompt_template.format_messages( + question=question, + reference_answer=reference_answer, + generated_answer=generated_answer + ) + + # Get LLM response + response = await self.llm.ainvoke(messages) + response_text = response.content + + # Try to parse the response as JSON + try: + import json + import re + + # First try to parse as direct JSON + eval_result = json.loads(response_text) + + except json.JSONDecodeError: + # If direct JSON parsing fails, try to extract JSON from markdown code blocks + try: + # Look for JSON within markdown code blocks (```json or just ```) + json_pattern = r'```(?:json)?\s*(\{.*?\})\s*```' + json_match = re.search(json_pattern, response_text, re.DOTALL) + + if json_match: + json_str = json_match.group(1) + eval_result = json.loads(json_str) + else: + # If no code blocks found, fall back to text extraction + raise json.JSONDecodeError("No JSON code blocks found", "", 0) + + except json.JSONDecodeError: + # Final fallback to text-based score extraction + score = self._extract_score_from_text(response_text) + reasoning = response_text + eval_result = None + + # Process the parsed JSON result + if eval_result is not None: + if isinstance(eval_result, dict) and 'score' in eval_result: + score = eval_result.get('score', 0.0) + reasoning = eval_result.get('reasoning', response_text) + else: + # If not proper JSON format, try to extract score from text + score = self._extract_score_from_text(response_text) + reasoning = response_text + + # Ensure score is numeric and between 0 and 1 + if isinstance(score, (int, float)): + score = max(0.0, min(1.0, float(score))) + else: + score = 0.0 + reasoning = f"Could not parse score from LLM response: {response_text}" + + return EvalOutputItem( + id=item.id, + score=score, + reasoning={ + "question": question, + "reference_answer": reference_answer, + "generated_answer": generated_answer, + "llm_judgment": reasoning, + "raw_response": response_text + } + ) + + except Exception as e: + logger.exception("Error evaluating item %s: %s", item.id, e) + return EvalOutputItem( + id=item.id, + score=0.0, + reasoning={ + "error": f"LLM evaluation failed: {str(e)}", + "question": question, + "reference_answer": reference_answer, + "generated_answer": generated_answer + } + ) + + def _extract_score_from_text(self, text: str) -> float: + """ + Extract a numeric score from text response if JSON parsing fails. + Looks for patterns like "Score: 0.8" or "8/10" or "80%" + """ + import re + + # Try to find score patterns in the text + patterns = [ + r'"?score"?[:\s]*([0-9]*\.?[0-9]+)', # "score": 0.8, score: 0.8, or score 0.8 + r'([0-9]*\.?[0-9]+)[/\s]*10', # "8/10" or "8 out of 10" + r'([0-9]*\.?[0-9]+)%', # "80%" + r'([0-9]*\.?[0-9]+)[/\s]*100', # "80/100" or "80 out of 100" + ] + + for pattern in patterns: + match = re.search(pattern, text.lower()) + if match: + try: + value = float(match.group(1)) + + # Normalize different scales to 0-1 range + if '/10' in pattern: + return value / 10.0 + elif '%' in pattern or '/100' in pattern: + return value / 100.0 + else: + # Assume it's already in 0-1 range, but clamp it + return max(0.0, min(1.0, value)) + except ValueError: + continue + + # Default to 0.0 if no score found + logger.warning("Could not extract score from text: %s", text) + return 0.0 \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/llm_judge_evaluator_register.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/llm_judge_evaluator_register.py new file mode 100644 index 00000000..462b2ac6 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/llm_judge_evaluator_register.py @@ -0,0 +1,66 @@ +from pydantic import Field + +from aiq.builder.builder import EvalBuilder +from aiq.builder.evaluator import EvaluatorInfo +from aiq.cli.register_workflow import register_evaluator +from aiq.data_models.evaluator import EvaluatorBaseConfig + + +class LLMJudgeEvaluatorConfig(EvaluatorBaseConfig, name="llm_judge"): + """Configuration for LLM-as-a-Judge evaluator.""" + + llm_name: str = Field(description="Name of the LLM to use as judge") + judge_prompt: str = Field( + description="Prompt template for the judge LLM. Should include {question}, {reference_answer}, and {generated_answer} placeholders", + default="""You are an expert evaluator for predictive maintenance systems. Your task is to evaluate how well a generated answer matches the reference answer for a given question. + +Question: {question} + +Reference Answer: {reference_answer} + +Generated Answer: {generated_answer} + +Please evaluate the generated answer against the reference answer considering: +1. Factual accuracy and correctness +2. Completeness of the response +3. Technical accuracy for predictive maintenance context +4. Relevance to the question asked + +Provide your evaluation as a JSON object with the following format: +{{ + "score": , + "reasoning": "" +}} + +The score should be: +- 1.0: Perfect match, completely accurate and complete +- 0.8-0.9: Very good, minor differences but essentially correct +- 0.6-0.7: Good, mostly correct with some inaccuracies or missing details +- 0.4-0.5: Fair, partially correct but with significant issues +- 0.2-0.3: Poor, mostly incorrect but some relevant information +- 0.0-0.1: Very poor, completely incorrect or irrelevant""" + ) + + +@register_evaluator(config_type=LLMJudgeEvaluatorConfig) +async def register_llm_judge_evaluator(config: LLMJudgeEvaluatorConfig, builder: EvalBuilder): + """Register the LLM Judge evaluator with AIQ Toolkit.""" + from aiq.builder.framework_enum import LLMFrameworkEnum + + from .llm_judge_evaluator import LLMJudgeEvaluator + + # Get the LLM instance + llm = await builder.get_llm(config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + + # Create the evaluator instance + evaluator = LLMJudgeEvaluator( + llm=llm, + judge_prompt=config.judge_prompt, + max_concurrency=builder.get_max_concurrency() + ) + + yield EvaluatorInfo( + config=config, + evaluate_fn=evaluator.evaluate, + description="LLM-as-a-Judge Evaluator for Predictive Maintenance" + ) \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/multimodal_llm_judge_evaluator.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/multimodal_llm_judge_evaluator.py new file mode 100644 index 00000000..590d5fab --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/multimodal_llm_judge_evaluator.py @@ -0,0 +1,412 @@ +""" +Multimodal LLM Judge Evaluator + +An enhanced evaluator that uses llama-3.2-90b-instruct to evaluate both text and visual outputs +from agentic workflows. This evaluator is specifically designed for predictive maintenance +responses that may include plots and visualizations. +""" + +import asyncio +import logging +import os +import re +from typing import Any, Dict, Union, Optional +from pathlib import Path + +from langchain_core.language_models import BaseChatModel +from langchain_core.messages import HumanMessage +from langchain_core.prompts import ChatPromptTemplate + +from aiq.eval.evaluator.base_evaluator import BaseEvaluator +from aiq.eval.evaluator.evaluator_model import EvalInputItem, EvalOutputItem + +try: + from PIL import Image + import base64 + from io import BytesIO + HAS_PIL = True +except ImportError: + HAS_PIL = False + logging.warning("PIL not available. Image evaluation will be disabled.") + +logger = logging.getLogger(__name__) + + +class MultimodalLLMJudgeEvaluator(BaseEvaluator): + """ + Enhanced multimodal LLM Judge evaluator using llama-3.2-90b-instruct that can evaluate + responses containing both text and visual elements (plots). + + This evaluator automatically detects plot paths in responses and includes + visual analysis in the evaluation process using a unified prompt. + """ + + def __init__( + self, + llm: BaseChatModel, + judge_prompt: str, + max_concurrency: int = 4, + ): + super().__init__(max_concurrency=max_concurrency, tqdm_desc="Multimodal LLM Judge Evaluating") + self.llm = llm + self.judge_prompt = judge_prompt + + # Create the prompt template + self.prompt_template = ChatPromptTemplate.from_template(self.judge_prompt) + + logger.debug("Multimodal LLM Judge evaluator initialized.") + logger.debug(f"Model: llama-3.2-90b-instruct") + + @classmethod + def from_config( + cls, + llm: BaseChatModel, + judge_prompt: str, + max_concurrency: int = 4, + **kwargs + ): + """Create MultimodalLLMJudgeEvaluator from configuration parameters.""" + return cls( + llm=llm, + judge_prompt=judge_prompt, + max_concurrency=max_concurrency + ) + + async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem: + """ + Evaluate a single EvalInputItem that may contain text and/or visual elements. + + This method uses a unified evaluation approach that handles both text-only + and text+visual responses with a single comprehensive prompt. + """ + question = str(item.input_obj) if item.input_obj else "" + reference_answer = str(item.expected_output_obj) if item.expected_output_obj else "" + generated_answer = str(item.output_obj) if item.output_obj else "" + + try: + # Check if the response contains plots + plot_paths = self._extract_plot_paths(generated_answer) + + # Use unified evaluation for both text-only and text+visual responses + return await self._evaluate_unified( + item, question, reference_answer, generated_answer, plot_paths + ) + + except Exception as e: + logger.exception("Error evaluating item %s: %s", item.id, e) + return EvalOutputItem( + id=item.id, + score=0.0, + reasoning={ + "error": f"Evaluation failed: {str(e)}", + "question": question, + "reference_answer": reference_answer, + "generated_answer": generated_answer, + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "ERROR" + } + ) + + def _extract_plot_paths(self, response: str) -> list[str]: + """Extract all PNG file paths from the generated response.""" + plot_paths = [] + + # Look for PNG file paths in the response with improved patterns + png_patterns = [ + r'([^\s\[\]]+\.png)', # Original pattern but excluding brackets + r'([/][^\s\[\]]+\.png)', # Paths starting with / + r'([A-Za-z]:[^\s\[\]]+\.png)', # Windows paths starting with drive letter + r'file://([^\s\[\]]+\.png)', # file:// URLs + r'\[([^\[\]]+\.png)\]', # Paths inside square brackets + r'located at ([^\s]+\.png)', # "located at path.png" pattern + r'saved.*?([/][^\s]+\.png)', # "saved at /path.png" pattern + ] + + for pattern in png_patterns: + matches = re.findall(pattern, response) + for match in matches: + # Clean up the match - remove any trailing punctuation + clean_match = match.rstrip('.,;:!?)]') + # Check if the file actually exists + if os.path.exists(clean_match): + plot_paths.append(clean_match) + + # Also look for responses that mention plot/chart generation even if file doesn't exist + # This helps with cases where files are generated after response but before evaluation + plot_indicators = [ + r'plot.*generated', r'chart.*generated', r'histogram.*generated', + r'visualization.*generated', r'\.png.*generated', r'plot.*saved', + r'chart.*saved', r'saved.*\.png' + ] + + has_plot_indicator = any(re.search(indicator, response, re.IGNORECASE) + for indicator in plot_indicators) + + # If we detect plot generation language but no existing files, + # try to find PNG files in the output_data directory that might be related + if has_plot_indicator and not plot_paths: + output_dir = "/Users/vikalluru/Documents/GenerativeAIExamples/industries/manufacturing/predictive_maintenance_agent/output_data" + if os.path.exists(output_dir): + png_files = [f for f in os.listdir(output_dir) if f.endswith('.png')] + # Add the most recently modified PNG files + for png_file in png_files[-3:]: # Last 3 PNG files as a heuristic + full_path = os.path.join(output_dir, png_file) + plot_paths.append(full_path) + + return list(set(plot_paths)) # Remove duplicates + + async def _evaluate_unified( + self, + item: EvalInputItem, + question: str, + reference_answer: str, + generated_answer: str, + plot_paths: list[str] + ) -> EvalOutputItem: + """ + Unified evaluation method that handles both text-only and text+visual responses. + Uses a single comprehensive prompt that works for both scenarios. + """ + try: + # Load and encode images if plot paths are provided + image_data_list = [] + valid_plot_paths = [] + + if plot_paths and HAS_PIL: + for plot_path in plot_paths: + image_data = self._load_and_encode_image(plot_path) + if image_data: + image_data_list.append(image_data) + valid_plot_paths.append(plot_path) + + # Determine evaluation type based on whether we have valid images + has_visuals = len(image_data_list) > 0 + evaluation_type = "multimodal" if has_visuals else "text_only" + + logger.info(f"Evaluation for item {item.id}: has_visuals={has_visuals}, plot_paths={plot_paths}, valid_plot_paths={valid_plot_paths}, image_data_count={len(image_data_list)}") + + # Use the configured judge_prompt and add explicit evaluation mode instruction + prompt_text = self.judge_prompt.format( + question=question, + reference_answer=reference_answer, + generated_answer=generated_answer + ) + + # Add explicit instruction based on whether we have visuals + if has_visuals: + prompt_text += f"\n\n🚨 CRITICAL OVERRIDE 🚨\nYou can see {len(image_data_list)} plot image(s) attached to this message.\nYou MUST respond with 'EVALUATION TYPE: PLOT' and evaluate the attached images against the reference description.\nIGNORE any text analysis - focus ONLY on the visual plot content." + logger.info(f"Using PLOT evaluation mode for item {item.id} with {len(image_data_list)} images") + else: + prompt_text += "\n\n🚨 CRITICAL OVERRIDE 🚨\nNo images are attached to this message.\nYou MUST respond with 'EVALUATION TYPE: TEXT' and evaluate only the text content.\nDo NOT attempt plot evaluation." + logger.info(f"Using TEXT evaluation mode for item {item.id}") + + # Call LLM using LangChain + if has_visuals: + # Call with images using LangChain multimodal capability + response_text = await self._call_visual_api_langchain( + prompt_text, image_data_list + ) + else: + # Call without images (text-only) + response_text = await self._call_api_langchain( + question, reference_answer, generated_answer + ) + + # Parse the response + logger.info(f"LLM response for item {item.id}: {response_text[:200]}...") + score, reasoning = self._parse_evaluation_response(response_text) + + # Build reasoning object + reasoning_obj = { + "question": question, + "reference_answer": reference_answer, + "generated_answer": generated_answer, + "llm_judgment": reasoning, + "plot_paths": valid_plot_paths, + "num_images_analyzed": len(image_data_list), + "evaluation_type": "PLOT" if has_visuals else "TEXT" + } + + return EvalOutputItem( + id=item.id, + score=score, + reasoning=reasoning_obj + ) + + except Exception as e: + logger.exception("Error in unified evaluation for item %s: %s", item.id, e) + return EvalOutputItem( + id=item.id, + score=0.0, + reasoning={ + "error": f"Unified evaluation failed: {str(e)}", + "question": question, + "reference_answer": reference_answer, + "generated_answer": generated_answer, + "plot_paths": [], + "num_images_analyzed": 0, + "evaluation_type": "ERROR" + } + ) + + async def _call_api_langchain( + self, + question: str, + reference_answer: str, + generated_answer: str + ) -> str: + """Call the API using LangChain for text-only evaluation.""" + messages = self.prompt_template.format_messages( + question=question, + reference_answer=reference_answer, + generated_answer=generated_answer + ) + + response = await self.llm.ainvoke(messages) + return response.content + + async def _call_visual_api_langchain( + self, + prompt_text: str, + image_data_list: list[str] + ) -> str: + """Call the API using LangChain for visual evaluation with multiple images.""" + # Create content with text and all images + content = [ + { + "type": "text", + "text": prompt_text + } + ] + + # Add all images to the content + for image_data in image_data_list: + content.append({ + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{image_data}" + } + }) + + messages = [ + HumanMessage(content=content) + ] + + response = await self.llm.ainvoke(messages) + return response.content + + def _load_and_encode_image(self, image_path: str) -> Optional[str]: + """Load an image file and encode it as base64.""" + try: + with Image.open(image_path) as img: + # Convert to RGB if necessary + if img.mode != 'RGB': + img = img.convert('RGB') + + # Save to bytes buffer + buffer = BytesIO() + img.save(buffer, format='PNG') + buffer.seek(0) + + # Encode as base64 + image_data = base64.b64encode(buffer.getvalue()).decode('utf-8') + return image_data + + except Exception as e: + logger.exception("Error loading image from %s: %s", image_path, e) + return None + + def _parse_evaluation_response(self, response_text: str) -> tuple[float, str]: + """Parse the evaluation response and extract score and reasoning.""" + try: + import json + + # First try to parse as direct JSON + eval_result = json.loads(response_text) + + except json.JSONDecodeError: + # If direct JSON parsing fails, try to extract JSON from markdown code blocks + try: + # Look for JSON within markdown code blocks (```json or just ```) + json_pattern = r'```(?:json)?\s*(\{.*?\})\s*```' + json_match = re.search(json_pattern, response_text, re.DOTALL) + + if json_match: + json_str = json_match.group(1) + eval_result = json.loads(json_str) + else: + # If no code blocks found, fall back to text extraction + raise json.JSONDecodeError("No JSON code blocks found", "", 0) + + except json.JSONDecodeError: + # Final fallback to text-based score extraction + score = self._extract_score_from_text(response_text) + reasoning = response_text + return score, reasoning + + # Process the parsed JSON result + if isinstance(eval_result, dict) and 'score' in eval_result: + score = eval_result.get('score', 0.0) + reasoning = eval_result.get('reasoning', response_text) + else: + # If not proper JSON format, try to extract score from text + score = self._extract_score_from_text(response_text) + reasoning = response_text + + # Ensure score is valid (0.0, 0.5, or 1.0) + if isinstance(score, (int, float)): + # Round to nearest valid score + if score <= 0.25: + score = 0.0 + elif score <= 0.75: + score = 0.5 + else: + score = 1.0 + else: + score = 0.0 + reasoning = f"Could not parse score from LLM response: {response_text}" + + return score, reasoning + + def _extract_score_from_text(self, text: str) -> float: + """ + Extract a numeric score from text response if JSON parsing fails. + Looks for patterns like "Score: 0.8" or "8/10" or "80%" and maps to 0.0, 0.5, 1.0 + """ + import re + + # Try to find score patterns in the text + patterns = [ + r'"?score"?[:\s]*([0-9]*\.?[0-9]+)', # "score": 0.8, score: 0.8, or score 0.8 + r'([0-9]*\.?[0-9]+)[/\s]*10', # "8/10" or "8 out of 10" + r'([0-9]*\.?[0-9]+)%', # "80%" + r'([0-9]*\.?[0-9]+)[/\s]*100', # "80/100" or "80 out of 100" + ] + + for pattern in patterns: + match = re.search(pattern, text.lower()) + if match: + try: + value = float(match.group(1)) + + # Normalize different scales to 0-1 range first + if '/10' in pattern: + value = value / 10.0 + elif '%' in pattern or '/100' in pattern: + value = value / 100.0 + + # Now map to 0.0, 0.5, 1.0 + if value <= 0.25: + return 0.0 + elif value <= 0.75: + return 0.5 + else: + return 1.0 + + except ValueError: + continue + + # Default to 0.0 if no score found + logger.warning("Could not extract score from text: %s", text) + return 0.0 \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/multimodal_llm_judge_evaluator_register.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/multimodal_llm_judge_evaluator_register.py new file mode 100644 index 00000000..3f6fec70 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/evaluators/multimodal_llm_judge_evaluator_register.py @@ -0,0 +1,85 @@ +from pydantic import Field + +from aiq.builder.builder import EvalBuilder +from aiq.builder.evaluator import EvaluatorInfo +from aiq.cli.register_workflow import register_evaluator +from aiq.data_models.evaluator import EvaluatorBaseConfig + + +class MultimodalLLMJudgeEvaluatorConfig(EvaluatorBaseConfig, name="multimodal_llm_judge_evaluator"): + """Configuration for Multimodal LLM Judge evaluator with text and visual evaluation capabilities.""" + + llm_name: str = Field(description="Name of the LLM to use as judge (should support vision for multimodal evaluation)") + judge_prompt: str = Field( + description="Prompt template for the judge LLM. Should include {question}, {reference_answer}, and {generated_answer} placeholders. This prompt works for both text-only and multimodal evaluation.", + default="""You are an expert evaluator for predictive maintenance agentic workflows. Your task is to evaluate how well a generated response (which may include both text and visualizations) matches the reference answer for a given question. + +Question: {question} + +Reference Answer: {reference_answer} + +Generated Response: {generated_answer} + +Please evaluate the complete response considering: + +TEXT EVALUATION: +1. Factual accuracy and correctness of technical information +2. Completeness of the response (does it answer all parts of the question?) +3. Technical accuracy for predictive maintenance context (RUL predictions, sensor data analysis, etc.) +4. Appropriate use of predictive maintenance terminology and concepts + +VISUAL EVALUATION (if plots/charts are present): +1. Does the visualization show the correct data/variables as specified in the reference? +2. Are the axes labeled correctly and with appropriate ranges? +3. Does the plot type (line chart, bar chart, distribution, etc.) match what was requested? +4. Are the data values, trends, and patterns approximately correct? +5. Is the visualization clear and appropriate for predictive maintenance analysis? +6. Does the plot help answer the original question effectively? + +COMBINED EVALUATION: +1. Do the text and visual elements complement each other appropriately? +2. Does the overall response provide a complete answer? +3. Is the combination more helpful than text or visuals alone would be? + +For predictive maintenance context, pay special attention to: +- RUL (Remaining Useful Life) predictions and trends +- Sensor data patterns and operational settings +- Time-series data representation +- Unit/engine-specific data filtering +- Dataset context (FD001, FD002, etc.) + +Provide your evaluation as a JSON object with the following format: +{{ + "score": , + "reasoning": "" +}} + +The score should be: +- 1.0: Completely correct response - text and any visuals match reference accurately, comprehensive and helpful +- 0.5: Partially correct response - some elements correct but significant issues in text or visuals +- 0.0: Completely wrong response - major errors in text or visuals that make the response unhelpful""" + ) + + +@register_evaluator(config_type=MultimodalLLMJudgeEvaluatorConfig) +async def register_multimodal_llm_judge_evaluator(config: MultimodalLLMJudgeEvaluatorConfig, builder: EvalBuilder): + """Register the Multimodal LLM Judge evaluator with AIQ Toolkit.""" + from aiq.builder.framework_enum import LLMFrameworkEnum + + from .multimodal_llm_judge_evaluator import MultimodalLLMJudgeEvaluator + + # Get the LLM instance + llm = await builder.get_llm(config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + + # Create the evaluator instance + evaluator = MultimodalLLMJudgeEvaluator( + llm=llm, + judge_prompt=config.judge_prompt, + max_concurrency=builder.get_max_concurrency() + ) + + yield EvaluatorInfo( + config=config, + evaluate_fn=evaluator.evaluate, + description="Multimodal LLM Judge Evaluator with Text and Visual Evaluation Capabilities for Predictive Maintenance" + ) \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/__init__.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/__init__.py new file mode 100644 index 00000000..16bfc5e4 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/__init__.py @@ -0,0 +1,21 @@ +""" +Plotting package for predictive maintenance agent. + +This package contains components for data visualization, plotting tools, +and code generation assistance for predictive maintenance workflows. +""" + +from . import plot_comparison_tool +from . import plot_distribution_tool +from . import plot_line_chart_tool +from . import plot_anomaly_tool +from . import code_generation_assistant +from .plot_utils import * + +__all__ = [ + "plot_comparison_tool", + "plot_distribution_tool", + "plot_line_chart_tool", + "plot_anomaly_tool", + "code_generation_assistant", +] \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/code_generation_assistant.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/code_generation_assistant.py new file mode 100644 index 00000000..7954b99c --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/code_generation_assistant.py @@ -0,0 +1,329 @@ +import logging +from typing import Any, Dict + +from pydantic import Field, BaseModel + +from aiq.builder.builder import Builder +from aiq.builder.function_info import FunctionInfo +from aiq.cli.register_workflow import register_function +from aiq.data_models.function import FunctionBaseConfig +from aiq.data_models.component_ref import LLMRef, FunctionRef +from aiq.builder.framework_enum import LLMFrameworkEnum + +logger = logging.getLogger(__name__) + + +class CodeGenerationAssistantConfig(FunctionBaseConfig, name="code_generation_assistant"): + """ + AIQ Toolkit function to generate and execute Python code based on input instructions and context. + This tool combines code generation with direct execution, returning results and any generated files. + """ + llm_name: LLMRef = Field(description="The LLM to use for code generation") + code_execution_tool: FunctionRef = Field(description="The code execution tool to run generated code") + output_folder: str = Field(description="The path to the output folder for generated files", default="./output_data") + verbose: bool = Field(description="Enable verbose logging", default=True) + + +@register_function(config_type=CodeGenerationAssistantConfig, framework_wrappers=[LLMFrameworkEnum.LANGCHAIN]) +async def code_generation_assistant( + config: CodeGenerationAssistantConfig, builder: Builder +): + class CodeGenerationInputSchema(BaseModel): + instructions: str = Field(description="Complete instructions including context, data information, and requirements for the code to be generated") + + # Get the LLM and code execution tool from builder + llm = await builder.get_llm(config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + code_execution_fn = builder.get_function(config.code_execution_tool) + + async def _generate_and_execute_code( + instructions: str + ) -> str: + """ + Generate and execute code based on complete instructions. + + Args: + instructions: Complete instructions including context, data information, and requirements for what the code should do + + Returns: + String containing execution results and summary + """ + + system_prompt = """You are an expert Python developer. Generate MINIMAL, EFFICIENT code. + +**CRITICAL OUTPUT REQUIREMENT:** +OUTPUT ONLY THE CODE. NO COMMENTS. NO DOCSTRINGS. NO EXPLANATIONS. +Generate only the code needed. Your response must contain ONLY executable Python code which will be DIRECTLY EXECUTED IN A SANDBOX. + +**CODE REQUIREMENTS:** +1. Generate COMPLETE, SYNTACTICALLY CORRECT Python code +2. ALWAYS finish the complete code - never stop mid-statement +3. EVERY if/elif statement MUST have a complete return statement or action +4. NO comments, NO docstrings, NO explanations +5. Use minimal variable names (df, fig, data, etc.) +6. The working directory is already set to: {output_folder} +7. Use relative paths like "./filename" for file operations +8. For data analysis: use pandas and plotly +9. For visualizations: save as HTML with fig.write_html() +10. For data: save as JSON with to_json() + +**MANDATORY COMPLETION:** +Every script MUST end with file saving and print statement: +```python +fig.write_html('./filename.html') +print(f"Successfully saved file to: filename.html") +``` + +GENERATE CODE ONLY. NO COMMENTS. NO EXPLANATIONS.""" + + user_prompt = """**INSTRUCTIONS:** +{instructions} + +**IMPORTANT FILE PATH HANDLING:** +- Input files mentioned in instructions should be accessed using ONLY the filename (e.g., "data.json") +- All files are available in the current working directory +- Use "./filename" pattern for all file operations + +Generate the Python code that fulfills these instructions.""" + + if config.verbose: + logger.info(f"Generating code with instructions: {instructions}") + + try: + from langchain_core.prompts.chat import ChatPromptTemplate + + # Create prompt template following the existing pattern + prompt = ChatPromptTemplate.from_messages([("system", system_prompt), ("user", user_prompt)]) + coding_chain = prompt | llm + + # Generate code using the LLM with proper parameter passing + response = await coding_chain.ainvoke({ + "output_folder": config.output_folder, + "instructions": instructions + }) + + # Clean up the response to extract just the code + raw_code = response.content.strip() if hasattr(response, 'content') else str(response).strip() + code = _clean_generated_code(raw_code) + + if config.verbose: + logger.info(f"Generated code length: {len(code)} characters") + logger.info(f"Generated code:\n{code}") + + # Check if code appears to be truncated and request completion + is_truncated = (not code.endswith(')') and not code.endswith('"') and + not code.endswith("'") and not code.endswith(';')) + has_incomplete_fig_write = 'fig.write' in code and not 'fig.write_html(' in code + + if is_truncated or has_incomplete_fig_write: + logger.warning("Generated code appears to be incomplete. Requesting completion from LLM...") + logger.warning(f"Code ends with: '{code[-100:]}'") + + # Create a completion prompt + completion_prompt = f"""The following Python code was generated but appears to be incomplete: + +```python +{code} +``` + +Please complete ONLY the remaining code that's missing. Do not repeat the existing code, just provide the completion starting from where it left off. Ensure you complete any unfinished statements and add proper file saving and print statements. + +Requirements: +- Complete any unfinished lines or statements +- If there's a visualization (fig), ensure it's saved with fig.write_html('./filename.html') +- If there's data output, save with appropriate method (e.g., to_json()) +- Add a print statement showing the saved file +- Do not include any explanations, just the completion code + +Completion:""" + + try: + # Request completion from LLM + completion_response = await llm.ainvoke(completion_prompt) + raw_completion = completion_response.content.strip() if hasattr(completion_response, 'content') else str(completion_response).strip() + completion_code = _clean_generated_code(raw_completion) + + # Append completion to original code + if completion_code: + code = code + "\n" + completion_code + logger.info(f"Code completion added. New total length: {len(code)} characters") + if config.verbose: + logger.info(f"Added completion:\n{completion_code}") + + except Exception as e: + logger.error(f"Failed to get code completion: {e}") + # Fallback to simple auto-fix for fig.write + if code.endswith('fig.write'): + code += f"_html('./plot.html')\nprint(f'Plot saved to: plot.html')" + logger.info("Applied fallback auto-fix for fig.write statement") + + # Execute the generated code + if config.verbose: + logger.info("Executing generated code...") + + execution_result = await code_execution_fn.acall_invoke(generated_code=code) + + if config.verbose: + logger.info(f"Code execution completed with status: {execution_result.get('process_status', 'unknown')}") + logger.info(f"Execution output: {execution_result}") + + # Parse execution result and create clean response + process_status = execution_result.get('process_status', 'unknown') + raw_stdout = execution_result.get('stdout', '') + stderr = execution_result.get('stderr', '') + + # Handle nested JSON in stdout and check for actual execution errors + actual_execution_failed = False + try: + if raw_stdout.startswith('{"') and raw_stdout.endswith('}\n'): + import json + nested_result = json.loads(raw_stdout.strip()) + nested_status = nested_result.get('process_status', '') + actual_stdout = nested_result.get('stdout', '') + actual_stderr = nested_result.get('stderr', '') + + # Check if the nested execution actually failed + if nested_status == 'error' or actual_stderr: + actual_execution_failed = True + process_status = 'error' # Override the outer status + if config.verbose: + logger.warning(f"Detected nested execution error: {actual_stderr}") + else: + actual_stdout = raw_stdout + actual_stderr = stderr + except: + actual_stdout = raw_stdout + actual_stderr = stderr + + # Extract generated files from output + generated_files = _extract_file_paths(actual_stdout, config.output_folder) + + # Create clean string response following the codebase pattern + if process_status in ['completed', 'success'] and not actual_execution_failed: + file_count = len(generated_files) + if file_count > 0: + file_list = ', '.join([f.split('/')[-1] for f in generated_files]) + response = f"Code executed successfully. Generated {file_count} file(s): {file_list}" + else: + response = "Code executed successfully." + + if actual_stdout: + # Clean and add output info + clean_output = actual_stdout.strip().replace('\n', ' ') + response += f"\n\nOutput: {clean_output}" + else: + response = f"Code execution failed with status: {process_status}" + if actual_stderr: + clean_error = actual_stderr.strip().replace('\n', ' ') + response += f"\nError: {clean_error}" + if actual_stdout: + clean_output = actual_stdout.strip().replace('\n', ' ') + response += f"\nOutput: {clean_output}" + + logger.info(f"Code generation assistant response: {response}") + return response + + except Exception as e: + logger.error(f"Error in code generation and execution: {e}") + return f"Error in code generation and execution: {str(e)}" + + yield FunctionInfo.from_fn( + fn=_generate_and_execute_code, + input_schema=CodeGenerationInputSchema, + description="""Generate and execute Python code based on complete instructions. + Accepts comprehensive instructions including context, data information, and requirements in a single parameter. + Returns a summary with execution status, generated files, and output details. + Specializes in data analysis, visualization, and file processing tasks. + Include all necessary context, data file information, and requirements in the instructions parameter.""") + + if config.verbose: + logger.info("Code generation assistant initialized successfully") + + +def _clean_generated_code(raw_code: str) -> str: + """ + Clean generated code by removing markdown formatting and explanatory text. + + Args: + raw_code: Raw code string from LLM response + + Returns: + Cleaned code string with only executable code + """ + code = raw_code.strip() + + # Remove markdown code blocks if present + if code.startswith("```python"): + code = code[9:] # Remove ```python + elif code.startswith("```"): + code = code[3:] # Remove ``` + + if code.endswith("```"): + code = code[:-3] # Remove closing ``` + + code = code.strip() + + # Remove any explanatory text that might appear after the code + # Look for common patterns that indicate explanatory text + explanatory_patterns = [ + "\nThis script performs", + "\nThis code performs", + "\nThe script does", + "\nThe code does", + "\nExplanation:", + "\nSummary:", + "\nThe above code", + "\nThis will", + "\nThe generated code" + ] + + for pattern in explanatory_patterns: + if pattern in code: + code = code.split(pattern)[0].strip() + break + + # Also remove any line that starts with explaining the script + lines = code.split('\n') + clean_lines = [] + + for line in lines: + stripped_line = line.strip() + # Skip lines that look like explanations + if (stripped_line.startswith('This script') or + stripped_line.startswith('This code') or + stripped_line.startswith('The script') or + stripped_line.startswith('The code') or + stripped_line.startswith('Explanation:') or + (stripped_line and not any(char in stripped_line for char in ['=', '(', ')', '[', ']', '{', '}', 'import', 'from', 'def', 'class', 'if', 'for', 'while', 'try', 'except', 'with', '#']))): + continue + clean_lines.append(line) + + return '\n'.join(clean_lines).strip() + + +def _extract_file_paths(stdout: str, output_folder: str) -> list: + """Extract generated file paths from execution output.""" + import re + import os + + files = [] + # Look for common patterns indicating file generation + patterns = [ + r'saved to[:\s]+([^\s\n]+\.(?:html|png|jpg|jpeg|pdf|csv|json))', + r'([^\s\n]+\.(?:html|png|jpg|jpeg|pdf|csv|json))', + r'Plot saved to[:\s]+([^\s\n]+)', + r'File saved[:\s]+([^\s\n]+)' + ] + + for pattern in patterns: + matches = re.findall(pattern, stdout, re.IGNORECASE) + for match in matches: + file_path = match.strip().strip('"\'') + if file_path and not file_path.startswith('#'): + # Convert relative paths to absolute if needed + if not os.path.isabs(file_path): + full_path = os.path.join(output_folder, file_path.lstrip('./')) + else: + full_path = file_path + files.append(full_path) + + return list(set(files)) # Remove duplicates \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_anomaly_tool.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_anomaly_tool.py new file mode 100644 index 00000000..2d3cd5f1 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_anomaly_tool.py @@ -0,0 +1,128 @@ +import json +import logging +import os +import pandas as pd +from typing import Optional +from pydantic import Field, BaseModel + +from aiq.builder.builder import Builder +from aiq.builder.function_info import FunctionInfo +from aiq.cli.register_workflow import register_function +from aiq.data_models.function import FunctionBaseConfig + +from .plot_utils import create_anomaly_plot_from_data + +logger = logging.getLogger(__name__) + + +class PlotAnomalyToolConfig(FunctionBaseConfig, name="plot_anomaly_tool"): + """ + AIQ Toolkit function to create anomaly detection visualizations. + """ + output_folder: str = Field(description="The path to the output folder to save plots.", default="./output_data") + + +@register_function(config_type=PlotAnomalyToolConfig) +async def plot_anomaly_tool(config: PlotAnomalyToolConfig, builder: Builder): + + class PlotAnomalyInputSchema(BaseModel): + anomaly_data_json_path: str = Field(description="Path to JSON file containing sensor data with is_anomaly column") + sensor_name: str = Field(description="Name of the sensor to plot", default="sensor_measurement_1") + engine_unit: int = Field(description="Engine unit number", default=5) + plot_title: Optional[str] = Field(description="Custom title for the plot", default=None) + + def load_json_data(json_path: str) -> Optional[pd.DataFrame]: + """Load data from JSON file.""" + try: + with open(json_path, 'r') as f: + data = json.load(f) + return pd.DataFrame(data) + except Exception as e: + logger.error(f"Error loading JSON data from {json_path}: {e}") + return None + + # Plotting logic moved to plot_utils.py for thread safety + + async def _response_fn( + anomaly_data_json_path: str, + sensor_name: str = "sensor_measurement_1", + engine_unit: int = 5, + plot_title: Optional[str] = None + ) -> str: + """ + Create anomaly detection visualization from sensor data with is_anomaly column. + """ + try: + # Load the data with anomaly information + data_df = load_json_data(anomaly_data_json_path) + if data_df is None: + return f"Failed to load anomaly data from {anomaly_data_json_path}" + + logger.info(f"Loaded anomaly data: {data_df.shape}") + + # Create the plot using thread-safe utility function + html_filepath, png_filepath = create_anomaly_plot_from_data( + data_df, sensor_name, engine_unit, + config.output_folder, plot_title + ) + + if html_filepath is None: + return "Failed to create anomaly visualization plot" + + # Build response + response_parts = [ + "ANOMALY DETECTION VISUALIZATION COMPLETED SUCCESSFULLY", + "", + f"Plot Details:", + f" • Sensor: {sensor_name}", + f" • Engine Unit: {engine_unit}", + f" • Data Points: {len(data_df)}", + f" • Anomalous Points: {len(data_df[data_df['is_anomaly'] == True])}", + "", + f"Output Files:", + f" • Interactive HTML: {html_filepath}", + f" • PNG Image: {png_filepath if png_filepath else 'Not generated'}", + "", + f"Visualization Features:", + f" • Blue line shows observed sensor readings", + f" • Red markers highlight detected anomalies", + f" • Interactive plot with zoom and hover capabilities", + "", + "ANOMALY PLOT GENERATION COMPLETE" + ] + + return "\n".join(response_parts) + + except Exception as e: + logger.error(f"Error in plot_anomaly_tool: {e}") + return f"Error creating anomaly plot: {str(e)}" + + description = """ + Create interactive anomaly detection visualizations from sensor data with is_anomaly column. + + This tool takes a single JSON file containing sensor data with an added 'is_anomaly' boolean column + (typically output from MOMENT anomaly detection tool) and creates a clean visualization. + + Features: + - Interactive HTML plot with zoom and hover capabilities + - Blue line for observed sensor readings + - Red markers for detected anomalies + - Automatic time axis detection (cycle, time_in_cycles, etc.) + - PNG export for reports and documentation + - Customizable plot titles + + Input: + - anomaly_data_json_path: Path to JSON file with sensor data and is_anomaly column [REQUIRED] + - sensor_name: Name of sensor column to plot (default: "sensor_measurement_1") + - engine_unit: Engine unit number for labeling (default: 5) + - plot_title: Custom title for the plot (optional) + + Output: + - Interactive HTML visualization file + - PNG image file (if successfully generated) + - Summary of plot generation with file paths + """ + + yield FunctionInfo.from_fn(_response_fn, + input_schema=PlotAnomalyInputSchema, + description=description) \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_comparison_tool.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_comparison_tool.py new file mode 100644 index 00000000..9bf406d9 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_comparison_tool.py @@ -0,0 +1,154 @@ +import json +import logging +import os +import pandas as pd + +from pydantic import Field, BaseModel + +from aiq.builder.builder import Builder +from aiq.builder.function_info import FunctionInfo +from aiq.cli.register_workflow import register_function +from aiq.data_models.function import FunctionBaseConfig + +logger = logging.getLogger(__name__) + +def verify_json_path(file_path: str) -> str: + """ + Verify that the input is a valid path to a JSON file. + + Args: + file_path (str): Path to verify + + Returns: + str: Verified file path + + Raises: + ValueError: If input is not a string or not a JSON file + FileNotFoundError: If file does not exist + json.JSONDecodeError: If file contains invalid JSON + """ + if not isinstance(file_path, str): + raise ValueError("Input must be a string path to a JSON file") + + if not file_path.lower().endswith('.json'): + raise ValueError("Input must be a path to a JSON file (ending with .json)") + + if not os.path.exists(file_path): + raise FileNotFoundError(f"JSON file not found at path: {file_path}") + + try: + with open(file_path, 'r') as f: + json.load(f) # Verify file contains valid JSON + except json.JSONDecodeError: + raise ValueError(f"File at {file_path} does not contain valid JSON data") + + return file_path + +class PlotComparisonToolConfig(FunctionBaseConfig, name="plot_comparison_tool"): + """ + AIQ Toolkit function to plot comparison of two y-axis columns against an x-axis column. + """ + output_folder: str = Field(description="The path to the output folder to save plots.", default="./output_data") + +@register_function(config_type=PlotComparisonToolConfig) +async def plot_comparison_tool( + config: PlotComparisonToolConfig, builder: Builder +): + class PlotComparisonInputSchema(BaseModel): + data_json_path: str = Field(description="The path to the JSON file containing the data") + x_axis_column: str = Field(description="The column name for x-axis data", default="time_in_cycles") + y_axis_column_1: str = Field(description="The first column name for y-axis data", default="actual_RUL") + y_axis_column_2: str = Field(description="The second column name for y-axis data", default="predicted_RUL") + plot_title: str = Field(description="The title for the plot", default="Comparison Plot") + + from .plot_utils import create_comparison_plot, load_data_from_json + + async def _response_fn(data_json_path: str, x_axis_column: str, y_axis_column_1: str, y_axis_column_2: str, plot_title: str) -> str: + """ + Process the input message and generate comparison plot. + """ + try: + # Load data to validate columns exist + df = load_data_from_json(data_json_path) + if df is None or df.empty: + return "Could not load data or data is empty from the provided JSON file" + + # Check required columns + required_columns = [x_axis_column, y_axis_column_1, y_axis_column_2] + missing_columns = [col for col in required_columns if col not in df.columns] + if missing_columns: + return f"Data from {data_json_path} must contain columns: {required_columns}. Missing: {missing_columns}" + + # Use utility function to create plot + html_filepath, png_filepath = create_comparison_plot( + output_dir=config.output_folder, + data_json_path=data_json_path, + x_col=x_axis_column, + y_col_1=y_axis_column_1, + y_col_2=y_axis_column_2, + title=plot_title + ) + + # Convert absolute path to file:// URL for proper browser handling + html_file_url = f"file://{html_filepath}" + + # Build file information for response + file_info = f"- HTML File: {html_filepath}\n- HTML URL: {html_file_url}" + if png_filepath: + file_info += f"\n- PNG File: {png_filepath}" + + # Return a clear completion message that the LLM will understand + return f"""TASK COMPLETED SUCCESSFULLY + +Comparison plot has been generated and saved in multiple formats. + +Chart Details: +- Type: Comparison plot with two lines (Plotly) +- X-axis: {x_axis_column} +- Y-axis Line 1: {y_axis_column_1} (dashed teal) +- Y-axis Line 2: {y_axis_column_2} (solid green) +- Title: {plot_title} +{file_info} + +✅ CHART GENERATION COMPLETE - NO FURTHER ACTION NEEDED""" + + except FileNotFoundError as e: + error_msg = f"Required data file ('{data_json_path}') not found for comparison plot: {e}" + logger.error(error_msg) + return error_msg + except KeyError as ke: + error_msg = f"Missing required columns in '{data_json_path}' for comparison plot: {ke}" + logger.error(error_msg) + return error_msg + except ValueError as ve: + error_msg = f"Data validation error for comparison plot: {ve}" + logger.error(error_msg) + return error_msg + except Exception as e: + error_msg = f"Error generating comparison plot: {e}" + logger.error(error_msg) + return error_msg + + prompt = """ + Generate interactive comparison plot between two columns from JSON data using Plotly. + + Input: + - data_json_path: Path to the JSON file containing the data + - x_axis_column: Column name for x-axis data + - y_axis_column_1: Column name for first y-axis data + - y_axis_column_2: Column name for second y-axis data + - plot_title: Title for the plot + + Output: + - HTML file containing the interactive comparison plot + - PNG file containing the static comparison plot + """ + yield FunctionInfo.from_fn(_response_fn, + input_schema=PlotComparisonInputSchema, + description=prompt) + try: + pass + except GeneratorExit: + logger.info("Plot comparison function exited early!") + finally: + logger.info("Cleaning up plot_comparison_tool workflow.") diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_distribution_tool.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_distribution_tool.py new file mode 100644 index 00000000..6fcfa28a --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_distribution_tool.py @@ -0,0 +1,138 @@ +import json +import logging +import os + +from pydantic import Field, BaseModel + +from aiq.builder.builder import Builder +from aiq.builder.function_info import FunctionInfo +from aiq.cli.register_workflow import register_function +from aiq.data_models.function import FunctionBaseConfig + +logger = logging.getLogger(__name__) + +def verify_json_path(file_path: str) -> str: + """ + Verify that the input is a valid path to a JSON file. + + Args: + file_path (str): Path to verify + + Returns: + str: Verified file path + + Raises: + ValueError: If input is not a string or not a JSON file + FileNotFoundError: If file does not exist + json.JSONDecodeError: If file contains invalid JSON + """ + if not isinstance(file_path, str): + raise ValueError("Input must be a string path to a JSON file") + + if not file_path.lower().endswith('.json'): + raise ValueError("Input must be a path to a JSON file (ending with .json)") + + if not os.path.exists(file_path): + raise FileNotFoundError(f"JSON file not found at path: {file_path}") + + try: + with open(file_path, 'r') as f: + json.load(f) # Verify file contains valid JSON + except json.JSONDecodeError: + raise ValueError(f"File at {file_path} does not contain valid JSON data") + + return file_path + +class PlotDistributionToolConfig(FunctionBaseConfig, name="plot_distribution_tool"): + """ + AIQ Toolkit function to plot distribution histogram of a specified column. + """ + output_folder: str = Field(description="The path to the output folder to save plots.", default="./output_data") + +@register_function(config_type=PlotDistributionToolConfig) +async def plot_distribution_tool( + config: PlotDistributionToolConfig, builder: Builder +): + class PlotDistributionInputSchema(BaseModel): + data_json_path: str = Field(description="The path to the JSON file containing the data") + column_name: str = Field(description="The column name to create distribution plot for", default="RUL") + plot_title: str = Field(description="The title for the plot", default="Distribution Plot") + + from .plot_utils import create_distribution_plot, load_data_from_json + + async def _response_fn(data_json_path: str, column_name: str, plot_title: str) -> str: + """ + Process the input message and generate distribution histogram file. + """ + data_json_path = verify_json_path(data_json_path) + try: + # Load data to validate column exists + df = load_data_from_json(data_json_path) + if df is None or df.empty: + return "Could not load data or data is empty from the provided JSON file" + + if column_name not in df.columns: + return f"Column '{column_name}' not found in data. Available columns: {df.columns.tolist()}" + + # Use utility function to create plot + html_filepath, png_filepath = create_distribution_plot( + output_dir=config.output_folder, + data_json_path=data_json_path, + column_name=column_name, + title=plot_title + ) + + # Convert absolute path to file:// URL for proper browser handling + html_file_url = f"file://{html_filepath}" + + # Build file information for response + file_info = f"- HTML File: {html_filepath}\n- HTML URL: {html_file_url}" + if png_filepath: + file_info += f"\n- PNG File: {png_filepath}" + + # Return a clear completion message that the LLM will understand + return f"""TASK COMPLETED SUCCESSFULLY + +Distribution histogram has been generated and saved in multiple formats. + +Chart Details: +- Type: Distribution histogram (30 bins, Plotly) +- Column: {column_name} +- Title: {plot_title} +{file_info} + +✅ CHART GENERATION COMPLETE - NO FURTHER ACTION NEEDED""" + + except FileNotFoundError as e: + error_msg = f"Required data file ('{data_json_path}') not found for distribution plot: {e}" + logger.error(error_msg) + return error_msg + except KeyError as ke: + error_msg = f"Missing expected column '{column_name}' in '{data_json_path}' for distribution plot: {ke}" + logger.error(error_msg) + return error_msg + except Exception as e: + error_msg = f"Error generating distribution histogram: {e}" + logger.error(error_msg) + return error_msg + + prompt = """ + Generate interactive distribution histogram from JSON data using Plotly. + Input: + - data_json_path: Path to the JSON file containing the data + - column_name: Column name for the distribution histogram + - plot_title: Title for the plot + + Output: + - HTML file containing the interactive distribution histogram + - PNG file containing the static distribution histogram + """ + yield FunctionInfo.from_fn(_response_fn, + input_schema=PlotDistributionInputSchema, + description=prompt) + try: + pass + except GeneratorExit: + logger.info("Plot distribution function exited early!") + finally: + logger.info("Cleaning up plot_distribution_tool workflow.") diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_line_chart_tool.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_line_chart_tool.py new file mode 100644 index 00000000..983c2db1 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_line_chart_tool.py @@ -0,0 +1,151 @@ +import json +import logging +import os + +from pydantic import Field, BaseModel + +from aiq.builder.builder import Builder +from aiq.builder.function_info import FunctionInfo +from aiq.cli.register_workflow import register_function +from aiq.data_models.function import FunctionBaseConfig + +logger = logging.getLogger(__name__) + +def verify_json_path(file_path: str) -> str: + """ + Verify that the input is a valid path to a JSON file. + + Args: + file_path (str): Path to verify + + Returns: + str: Verified file path + + Raises: + ValueError: If input is not a string or not a JSON file + FileNotFoundError: If file does not exist + json.JSONDecodeError: If file contains invalid JSON + """ + if not isinstance(file_path, str): + raise ValueError("Input must be a string path to a JSON file") + + if not file_path.lower().endswith('.json'): + raise ValueError("Input must be a path to a JSON file (ending with .json)") + + if not os.path.exists(file_path): + raise FileNotFoundError(f"JSON file not found at path: {file_path}") + + try: + with open(file_path, 'r') as f: + json.load(f) # Verify file contains valid JSON + except json.JSONDecodeError: + raise ValueError(f"File at {file_path} does not contain valid JSON data") + + return file_path + +class PlotLineChartToolConfig(FunctionBaseConfig, name="plot_line_chart_tool"): + """ + AIQ Toolkit function to plot a line chart with specified x and y axis columns. + """ + output_folder: str = Field(description="The path to the output folder to save plots.", default="./output_data") + +@register_function(config_type=PlotLineChartToolConfig) +async def plot_line_chart_tool( + config: PlotLineChartToolConfig, builder: Builder +): + class PlotLineChartInputSchema(BaseModel): + data_json_path: str = Field(description="The path to the JSON file containing the data") + x_axis_column: str = Field(description="The column name for x-axis data", default="time_in_cycles") + y_axis_column: str = Field(description="The column name for y-axis data", default="RUL") + plot_title: str = Field(description="The title for the plot", default="Line Chart") + + from .plot_utils import create_line_chart, load_data_from_json + + async def _response_fn(data_json_path: str, x_axis_column: str, y_axis_column: str, plot_title: str) -> str: + """ + Process the input message and generate line chart. + """ + data_json_path = verify_json_path(data_json_path) + + try: + # Load data to validate columns exist + df = load_data_from_json(data_json_path) + if df is None or df.empty: + return "Could not load data or data is empty from the provided JSON file" + + # Check required columns + required_columns = [x_axis_column, y_axis_column] + missing_columns = [col for col in required_columns if col not in df.columns] + if missing_columns: + return f"Data from {data_json_path} must contain columns: {required_columns}. Missing: {missing_columns}" + + # Use utility function to create plot + html_filepath, png_filepath = create_line_chart( + output_dir=config.output_folder, + data_json_path=data_json_path, + x_col=x_axis_column, + y_col=y_axis_column, + title=plot_title + ) + + # Convert absolute path to file:// URL for proper browser handling + html_file_url = f"file://{html_filepath}" + + # Build file information for response + file_info = f"- HTML File: {html_filepath}\n- HTML URL: {html_file_url}" + if png_filepath: + file_info += f"\n- PNG File: {png_filepath}" + + # Return a clear completion message that the LLM will understand + return f"""TASK COMPLETED SUCCESSFULLY + +Line chart has been generated and saved in multiple formats. + +Chart Details: +- Type: Line chart with markers (Plotly) +- X-axis: {x_axis_column} +- Y-axis: {y_axis_column} +- Title: {plot_title} +{file_info} + +✅ CHART GENERATION COMPLETE - NO FURTHER ACTION NEEDED""" + + except FileNotFoundError as e: + error_msg = f"Required data file ('{data_json_path}') not found for line chart: {e}" + logger.error(error_msg) + return error_msg + except KeyError as ke: + error_msg = f"Missing required columns in '{data_json_path}' for line chart: {ke}" + logger.error(error_msg) + return error_msg + except ValueError as ve: + error_msg = f"Data validation error for line chart: {ve}" + logger.error(error_msg) + return error_msg + except Exception as e: + error_msg = f"Error generating line chart: {e}" + logger.error(error_msg) + return error_msg + + prompt = """ + Generate interactive line chart from JSON data using Plotly. + + Input: + - data_json_path: Path to the JSON file containing the data + - x_axis_column: Column name for x-axis data + - y_axis_column: Column name for y-axis data + - plot_title: Title for the plot + + Output: + - HTML file containing the interactive line chart + - PNG file containing the static line chart + """ + yield FunctionInfo.from_fn(_response_fn, + input_schema=PlotLineChartInputSchema, + description=prompt) + try: + pass + except GeneratorExit: + logger.info("Plot line chart function exited early!") + finally: + logger.info("Cleaning up plot_line_chart_tool workflow.") diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_utils.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_utils.py new file mode 100644 index 00000000..d72ca204 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/plotting/plot_utils.py @@ -0,0 +1,564 @@ +import json +import logging +import os +import pandas as pd +from typing import Optional, Tuple + +logger = logging.getLogger(__name__) + +def load_data_from_json(json_path: str) -> Optional[pd.DataFrame]: + """Load data from JSON file into a pandas DataFrame.""" + try: + with open(json_path, 'r') as f: + data = json.load(f) + return pd.DataFrame(data) + except FileNotFoundError: + logger.error(f"JSON file not found at {json_path}") + return None + except json.JSONDecodeError: + logger.error(f"Could not decode JSON from {json_path}") + return None + except Exception as e: + logger.error(f"Error loading data from '{json_path}': {e}") + return None + +def save_plotly_as_png(fig, filepath: str, width: int = 650, height: int = 450) -> bool: + """ + Save plotly figure as PNG using matplotlib backend. + + Returns: + bool: True if successful, False otherwise + """ + try: + import matplotlib.pyplot as plt + import matplotlib + matplotlib.use('Agg') # Non-interactive backend + + # Create matplotlib figure + fig_mpl, ax = plt.subplots(figsize=(width/100, height/100)) + + # Plot each trace with simplified approach + for i, trace in enumerate(fig.data): + if trace.type == 'scatter': + # Handle line properties + line_style = '-' + color = '#1f77b4' # default color + + # Extract line properties safely + if hasattr(trace, 'line') and trace.line: + if hasattr(trace.line, 'dash') and trace.line.dash == 'dash': + line_style = '--' + if hasattr(trace.line, 'color') and trace.line.color: + color = trace.line.color + + # Extract marker color (takes precedence for better Plotly color preservation) + if hasattr(trace, 'marker') and trace.marker and hasattr(trace.marker, 'color') and trace.marker.color: + color = trace.marker.color + + # Extract name safely + name = trace.name if hasattr(trace, 'name') and trace.name else f'Trace {i+1}' + + # Plot based on mode + mode = getattr(trace, 'mode', 'lines') + if 'markers' in mode: + if mode == 'markers': + # Only markers, no lines + ax.plot(trace.x, trace.y, 'o', + color=color, label=name, markersize=6) + else: + # Both markers and lines + ax.plot(trace.x, trace.y, 'o-', + linestyle=line_style, color=color, + label=name, linewidth=2, markersize=4) + else: + ax.plot(trace.x, trace.y, linestyle=line_style, + color=color, label=name, linewidth=2) + + elif trace.type == 'histogram': + # Handle histogram properties + color = '#e17160' # default color + if hasattr(trace, 'marker') and trace.marker and hasattr(trace.marker, 'color'): + color = trace.marker.color + + name = trace.name if hasattr(trace, 'name') and trace.name else f'Histogram {i+1}' + ax.hist(trace.x, bins=30, alpha=0.8, color=color, + edgecolor='white', linewidth=0.5, label=name) + + # Apply layout safely + layout = fig.layout + if hasattr(layout, 'title') and layout.title and hasattr(layout.title, 'text') and layout.title.text: + ax.set_title(layout.title.text) + if hasattr(layout, 'xaxis') and layout.xaxis and hasattr(layout.xaxis, 'title') and layout.xaxis.title and hasattr(layout.xaxis.title, 'text'): + ax.set_xlabel(layout.xaxis.title.text) + if hasattr(layout, 'yaxis') and layout.yaxis and hasattr(layout.yaxis, 'title') and layout.yaxis.title and hasattr(layout.yaxis.title, 'text'): + ax.set_ylabel(layout.yaxis.title.text) + + # Show legend if there are multiple traces or if any trace has a name + if len(fig.data) > 1 or (len(fig.data) == 1 and hasattr(fig.data[0], 'name') and fig.data[0].name): + ax.legend() + + ax.grid(True, alpha=0.3) + plt.tight_layout() + plt.savefig(filepath, dpi=150, bbox_inches='tight') + plt.close() + + logger.info(f"PNG saved using matplotlib: {filepath}") + return True + + except Exception as e: + logger.error(f"Matplotlib PNG generation failed: {e}") + return False + +def create_comparison_plot(output_dir: str, data_json_path: str, x_col: str, + y_col_1: str, y_col_2: str, title: str) -> Tuple[str, Optional[str]]: + """ + Generate comparison plot in both HTML and PNG formats. + + Returns: + Tuple[str, Optional[str]]: (html_filepath, png_filepath) + """ + import plotly.graph_objects as go + import plotly.offline as pyo + + df = load_data_from_json(data_json_path) + if df is None or df.empty: + raise ValueError(f"Could not load data or data is empty from {data_json_path}") + + # Check required columns + required_columns = [x_col, y_col_1, y_col_2] + missing_columns = [col for col in required_columns if col not in df.columns] + if missing_columns: + raise KeyError(f"Data from {data_json_path} must contain columns: {required_columns}. Missing: {missing_columns}") + + # Sort by x-axis column for proper line plotting + df_sorted = df.sort_values(x_col) + + # Create the comparison plot + fig = go.Figure() + + # Add first line (dashed) + fig.add_trace(go.Scatter( + x=df_sorted[x_col], + y=df_sorted[y_col_1], + mode='lines', + name=y_col_1, + line=dict(color='#20B2AA', width=3, dash='dash'), + hovertemplate=f'{x_col}: %{{x}}
' + + f'{y_col_1}: %{{y:.1f}}
' + + '' + )) + + # Add second line (solid) + fig.add_trace(go.Scatter( + x=df_sorted[x_col], + y=df_sorted[y_col_2], + mode='lines', + name=y_col_2, + line=dict(color='#2E8B57', width=3), + hovertemplate=f'{x_col}: %{{x}}
' + + f'{y_col_2}: %{{y:.1f}}
' + + '' + )) + + # Update layout + fig.update_layout( + title=dict(text=title, x=0.5, font=dict(size=16)), + xaxis=dict(title=dict(text=x_col, font=dict(size=14)), gridcolor='lightgray', gridwidth=0.5), + yaxis=dict(title=dict(text='Value', font=dict(size=14)), gridcolor='lightgray', gridwidth=0.5), + width=800, height=450, plot_bgcolor='white', + legend=dict(x=1, y=0, xanchor='right', yanchor='bottom', + bgcolor='rgba(255,255,255,0.8)', bordercolor='gray', borderwidth=1), + hovermode='closest' + ) + + # Set y-axis range + y_min = min(df_sorted[y_col_1].min(), df_sorted[y_col_2].min()) + y_max = max(df_sorted[y_col_1].max(), df_sorted[y_col_2].max()) + y_range = y_max - y_min + fig.update_yaxes(range=[max(0, y_min - y_range * 0.05), y_max + y_range * 0.05]) + + # Save files + os.makedirs(output_dir, exist_ok=True) + + # HTML file + html_filepath = os.path.join(output_dir, f"comparison_plot_{y_col_1}_vs_{y_col_2}.html") + html_content = pyo.plot(fig, output_type='div', include_plotlyjs=True) + full_html = f""" + + + + {title} + + + + {html_content} + + + """ + + with open(html_filepath, 'w', encoding='utf-8') as f: + f.write(full_html) + logger.info(f"Comparison plot HTML saved: {html_filepath}") + + # PNG file + png_filepath = os.path.join(output_dir, f"comparison_plot_{y_col_1}_vs_{y_col_2}.png") + png_success = save_plotly_as_png(fig, png_filepath, width=800, height=450) + + return html_filepath, png_filepath if png_success else None + +def create_line_chart(output_dir: str, data_json_path: str, x_col: str, + y_col: str, title: str) -> Tuple[str, Optional[str]]: + """ + Generate line chart in both HTML and PNG formats. + + Returns: + Tuple[str, Optional[str]]: (html_filepath, png_filepath) + """ + import plotly.graph_objects as go + import plotly.offline as pyo + + df = load_data_from_json(data_json_path) + if df is None or df.empty: + raise ValueError(f"Could not load data or data is empty from {data_json_path}") + + # Check required columns + required_columns = [x_col, y_col] + missing_columns = [col for col in required_columns if col not in df.columns] + if missing_columns: + raise KeyError(f"Data from {data_json_path} must contain columns: {required_columns}. Missing: {missing_columns}") + + # Sort by x-axis column + df_sorted = df.sort_values(x_col) + + # Create line chart + fig = go.Figure() + fig.add_trace(go.Scatter( + x=df_sorted[x_col], + y=df_sorted[y_col], + mode='lines+markers', + name=y_col, + line=dict(color='#1f77b4', width=3), + marker=dict(size=6, color='#1f77b4'), + hovertemplate=f'{x_col}: %{{x}}
' + + f'{y_col}: %{{y:.2f}}
' + + '' + )) + + # Update layout + fig.update_layout( + title=dict(text=title, x=0.5, font=dict(size=16)), + xaxis=dict(title=dict(text=x_col, font=dict(size=14)), gridcolor='lightgray', gridwidth=0.5), + yaxis=dict(title=dict(text=y_col, font=dict(size=14)), gridcolor='lightgray', gridwidth=0.5), + width=650, height=450, plot_bgcolor='white', showlegend=False, hovermode='closest' + ) + + # Set y-axis range + y_min = df_sorted[y_col].min() + y_max = df_sorted[y_col].max() + y_range = y_max - y_min + if y_range > 0: + fig.update_yaxes(range=[y_min - y_range * 0.05, y_max + y_range * 0.05]) + + # Save files + os.makedirs(output_dir, exist_ok=True) + + # HTML file + html_filepath = os.path.join(output_dir, f"line_chart_{x_col}_vs_{y_col}.html") + html_content = pyo.plot(fig, output_type='div', include_plotlyjs=True) + full_html = f""" + + + + {title} + + + + {html_content} + + + """ + + with open(html_filepath, 'w', encoding='utf-8') as f: + f.write(full_html) + logger.info(f"Line chart HTML saved: {html_filepath}") + + # PNG file + png_filepath = os.path.join(output_dir, f"line_chart_{x_col}_vs_{y_col}.png") + png_success = save_plotly_as_png(fig, png_filepath, width=650, height=450) + + return html_filepath, png_filepath if png_success else None + +def create_distribution_plot(output_dir: str, data_json_path: str, column_name: str, + title: str) -> Tuple[str, Optional[str]]: + """ + Generate distribution histogram in both HTML and PNG formats. + + Returns: + Tuple[str, Optional[str]]: (html_filepath, png_filepath) + """ + import plotly.graph_objects as go + import plotly.offline as pyo + + df = load_data_from_json(data_json_path) + if df is None or df.empty: + raise ValueError(f"Could not load data or data is empty from {data_json_path}") + + if column_name not in df.columns: + raise KeyError(f"Data from {data_json_path} must contain '{column_name}' column. Found: {df.columns.tolist()}") + + # Create histogram + fig = go.Figure() + fig.add_trace(go.Histogram( + x=df[column_name], + nbinsx=30, + name=column_name, + marker=dict(color='#e17160', line=dict(color='white', width=1)), + opacity=0.8, + hovertemplate='Range: %{x}
' + + 'Count: %{y}
' + + '' + )) + + # Update layout + fig.update_layout( + title=dict(text=title, x=0.5, font=dict(size=14)), + xaxis=dict(title=dict(text=column_name, font=dict(size=12)), gridcolor='lightgray', gridwidth=0.5), + yaxis=dict(title=dict(text='Frequency', font=dict(size=12)), gridcolor='lightgray', gridwidth=0.5), + width=650, height=450, plot_bgcolor='white', showlegend=False, hovermode='closest' + ) + + # Save files + os.makedirs(output_dir, exist_ok=True) + + # HTML file + html_filepath = os.path.join(output_dir, f"distribution_plot_{column_name}.html") + html_content = pyo.plot(fig, output_type='div', include_plotlyjs=True) + full_html = f""" + + + + {title} + + + + {html_content} + + + """ + + with open(html_filepath, 'w', encoding='utf-8') as f: + f.write(full_html) + logger.info(f"Distribution plot HTML saved: {html_filepath}") + + # PNG file + png_filepath = os.path.join(output_dir, f"distribution_plot_{column_name}.png") + png_success = save_plotly_as_png(fig, png_filepath, width=650, height=450) + + return html_filepath, png_filepath if png_success else None + + +def create_moment_anomaly_visualization(df: pd.DataFrame, anomaly_indices, + anomaly_scores, sensor_name: str, + output_dir: str, engine_unit: int, dataset_name: str) -> Tuple[str, str]: + """Create interactive plot for MOMENT-based anomaly detection results for a single sensor.""" + try: + import plotly.graph_objects as go + import numpy as np + + if sensor_name not in df.columns: + raise ValueError(f"Sensor '{sensor_name}' not found in data. Available sensors: {df.columns.tolist()}") + + # Create a simple single plot + fig = go.Figure() + + # Create x-axis (check for various time column names) + time_columns = ['time_in_cycles', 'cycle', 'time', 'timestamp'] + x_axis = None + x_title = "Index" + + for col in time_columns: + if col in df.columns: + x_axis = df[col] + x_title = col.replace('_', ' ').title() + break + + if x_axis is None: + x_axis = df.index + x_title = "Index" + + # Plot all sensor readings as blue line (Observed) + fig.add_trace( + go.Scatter( + x=x_axis, + y=df[sensor_name], + mode='lines', + name='Observed', + line=dict(color='blue', width=2), + opacity=0.8 + ) + ) + + # Plot anomalous points as red markers + if len(anomaly_indices) > 0 and np.any(anomaly_indices): + # Find where anomalies are True + anomaly_positions = np.where(anomaly_indices)[0] + + # Make sure we don't go beyond the dataframe length + valid_positions = anomaly_positions[anomaly_positions < len(df)] + + if len(valid_positions) > 0: + anomaly_x = x_axis.iloc[valid_positions] + anomaly_y = df[sensor_name].iloc[valid_positions] + + fig.add_trace( + go.Scatter( + x=anomaly_x, + y=anomaly_y, + mode='markers', + name='Anomaly', + marker=dict(color='red', size=6, symbol='circle'), + opacity=0.9 + ) + ) + + fig.update_layout( + title=f'MOMENT Anomaly Detection - {sensor_name} (Engine {engine_unit})', + xaxis_title=x_title, + yaxis_title=f"{sensor_name}", + height=400, + showlegend=True, + font=dict(size=12), + template="plotly_white" + ) + + # Save as HTML + os.makedirs(output_dir, exist_ok=True) + html_filename = f"moment_anomaly_detection_{sensor_name}_engine{engine_unit}.html" + html_filepath = os.path.join(output_dir, html_filename) + fig.write_html(html_filepath) + + # Save as PNG using the safe function from plot_utils + png_filename = f"moment_anomaly_detection_{sensor_name}_engine{engine_unit}.png" + png_filepath = os.path.join(output_dir, png_filename) + png_success = save_plotly_as_png(fig, png_filepath, width=1200, height=400) + + logger.info(f"MOMENT anomaly visualization saved: HTML={html_filepath}, PNG={'Success' if png_success else 'Failed'}") + + return html_filepath, png_filepath if png_success else None + + except ImportError: + logger.error("Plotly not available for visualization") + return None, None + except Exception as e: + logger.error(f"Error creating MOMENT anomaly visualization: {e}") + return None, None + + +def create_anomaly_plot_from_data(data_df: pd.DataFrame, sensor_name: str, engine_unit: int, + output_dir: str, plot_title: str = None) -> Tuple[str, str]: + """ + Create anomaly detection visualization plot from sensor data with is_anomaly column. + + Args: + data_df: DataFrame containing sensor data with 'is_anomaly' boolean column + sensor_name: Name of the sensor column to plot + engine_unit: Engine unit number for labeling + output_dir: Directory to save plot files + plot_title: Custom title for the plot (optional) + + Returns: + Tuple of (html_filepath, png_filepath) + """ + try: + import plotly.graph_objects as go + import numpy as np + + if sensor_name not in data_df.columns: + raise ValueError(f"Sensor '{sensor_name}' not found in data. Available sensors: {data_df.columns.tolist()}") + + if 'is_anomaly' not in data_df.columns: + raise ValueError("'is_anomaly' column not found in data. Make sure to use output from MOMENT anomaly detection tool.") + + # Create figure + fig = go.Figure() + + # Determine time axis (check for various time column names) + time_columns = ['time_in_cycles', 'cycle', 'time', 'timestamp'] + x_axis = None + x_title = "Index" + + for col in time_columns: + if col in data_df.columns: + x_axis = data_df[col] + x_title = col.replace('_', ' ').title() + break + + if x_axis is None: + x_axis = data_df.index + x_title = "Index" + + # Plot all sensor readings as blue line (Observed) + fig.add_trace( + go.Scatter( + x=x_axis, + y=data_df[sensor_name], + mode='lines', + name='Observed', + line=dict(color='blue', width=2), + opacity=0.8 + ) + ) + + # Extract anomaly points directly from the is_anomaly column + anomaly_mask = data_df['is_anomaly'] == True + anomaly_indices = data_df[anomaly_mask].index.values + + # Plot anomalous points as red markers + if len(anomaly_indices) > 0: + anomaly_x = x_axis.iloc[anomaly_indices] + anomaly_y = data_df[sensor_name].iloc[anomaly_indices] + + fig.add_trace( + go.Scatter( + x=anomaly_x, + y=anomaly_y, + mode='markers', + name='Anomaly', + marker=dict(color='red', size=8, symbol='circle'), + opacity=0.9 + ) + ) + + fig.update_layout( + title=f'Anomaly Detection - {sensor_name} (Engine {engine_unit})', + xaxis_title=x_title, + yaxis_title=f"{sensor_name}", + height=500, + showlegend=True, + font=dict(size=12), + template="plotly_white" + ) + + # Save files + os.makedirs(output_dir, exist_ok=True) + + # HTML file + html_filename = f"anomaly_plot_{sensor_name}_engine{engine_unit}.html" + html_filepath = os.path.join(output_dir, html_filename) + fig.write_html(html_filepath) + + # PNG file using thread-safe function + png_filename = f"anomaly_plot_{sensor_name}_engine{engine_unit}.png" + png_filepath = os.path.join(output_dir, png_filename) + png_success = save_plotly_as_png(fig, png_filepath, width=1200, height=500) + + logger.info(f"Anomaly plot saved: HTML={html_filepath}, PNG={'Success' if png_success else 'Failed'}") + + return html_filepath, png_filepath if png_success else None + + except ImportError: + logger.error("Plotly not available for visualization") + return None, None + except Exception as e: + logger.error(f"Error creating anomaly plot: {e}") + return None, None \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/predictors/__init__.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/predictors/__init__.py new file mode 100644 index 00000000..5557ad1d --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/predictors/__init__.py @@ -0,0 +1,14 @@ +""" +Predictors package for predictive maintenance agent. + +This package contains components for prediction and anomaly detection +in predictive maintenance workflows. +""" + +from . import moment_anomaly_detection_tool +from . import predict_rul_tool + +__all__ = [ + "moment_anomaly_detection_tool", + "predict_rul_tool", +] \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/predictors/moment_anomaly_detection_tool.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/predictors/moment_anomaly_detection_tool.py new file mode 100644 index 00000000..9e60e5c0 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/predictors/moment_anomaly_detection_tool.py @@ -0,0 +1,391 @@ +import json +import logging +import os +import pandas as pd +import numpy as np +from typing import List, Tuple, Optional +from pydantic import Field, BaseModel + +from aiq.builder.builder import Builder +from aiq.builder.function_info import FunctionInfo +from aiq.cli.register_workflow import register_function +from aiq.data_models.function import FunctionBaseConfig + +# Note: Visualization is now handled by the separate plot_anomaly_tool + +logger = logging.getLogger(__name__) + +# Global model instance - initialized once when module is loaded +_MOMENT_MODEL: Optional[object] = None +_MODEL_DEVICE: Optional[str] = None + +def _initialize_moment_model(): + """Initialize MOMENT model once and cache it globally.""" + global _MOMENT_MODEL, _MODEL_DEVICE + + if _MOMENT_MODEL is not None: + logger.info("MOMENT model already initialized, reusing cached instance") + return _MOMENT_MODEL, _MODEL_DEVICE + + try: + logger.info("Initializing MOMENT-1-small model (one-time setup)...") + import time + start_time = time.time() + + from momentfm import MOMENTPipeline + import torch + + # Initialize MOMENT pipeline for anomaly detection + model_name = "MOMENT-1-small" + _MOMENT_MODEL = MOMENTPipeline.from_pretrained( + f"AutonLab/{model_name}", + model_kwargs={"task_name": "reconstruction"} + ) + _MOMENT_MODEL.init() + + # Move model to device + _MODEL_DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") + _MOMENT_MODEL = _MOMENT_MODEL.to(_MODEL_DEVICE).float() + + logger.info(f"MOMENT model initialized and cached in {time.time() - start_time:.2f} seconds on {_MODEL_DEVICE}") + return _MOMENT_MODEL, _MODEL_DEVICE + + except Exception as e: + logger.error(f"Failed to initialize MOMENT model: {e}") + raise RuntimeError(f"MOMENT model initialization failed: {e}") + +# Pre-initialize the model when module is imported (optional - can be lazy loaded) +try: + _initialize_moment_model() + logger.info("MOMENT model pre-loaded successfully") +except Exception as e: + logger.warning(f"MOMENT model pre-loading failed, will initialize on first use: {e}") + _MOMENT_MODEL = None + _MODEL_DEVICE = None + + +class TimeSeriesAnomalyDetectionToolConfig(FunctionBaseConfig, name="moment_anomaly_detection_tool"): + """ + AIQ Toolkit function to perform anomaly detection using MOMENT-1-small foundation model. + """ + output_folder: str = Field(description="The path to the output folder to save results.", default="./output_data") + +@register_function(config_type=TimeSeriesAnomalyDetectionToolConfig) +async def moment_anomaly_detection_tool( + config: TimeSeriesAnomalyDetectionToolConfig, builder: Builder +): + class MomentAnomalyDetectionInputSchema(BaseModel): + sensor_data_json_path: str = Field(description="Path to JSON file containing sensor data (from sql_retriever tool)") + engine_unit: int = Field(description="Engine unit number to analyze", default=5) + sensor_name: str = Field(description="Name of the sensor to analyze and plot (e.g., 'sensor_measurement_1', 'sensor_measurement_4')", default="sensor_measurement_1") + + def prepare_time_series_data_for_moment(df: pd.DataFrame, sensor_name: str, max_seq_len: int = 512) -> Tuple[List[np.ndarray], object]: + """Prepare time series data for MOMENT model input. + + MOMENT expects input shape: (batch_size, num_channels, seq_len) + For single sensor analysis: (1, 1, seq_len) where seq_len <= 512 + + Args: + df: DataFrame with sensor data + sensor_name: Name of the sensor column to process + max_seq_len: Maximum sequence length (512 for MOMENT-1-large) + + Returns: + List of sequences with shape (1, 1, seq_len) + """ + try: + # Select single sensor column + sensor_data = df[sensor_name].values + logger.info(f"Original sensor data shape: {sensor_data.shape}") + + # Normalize the data + from sklearn.preprocessing import StandardScaler + scaler = StandardScaler() + normalized_data = scaler.fit_transform(sensor_data.reshape(-1, 1)).flatten() + logger.info(f"Normalized sensor data shape: {normalized_data.shape}") + + # Split data into chunks of max_seq_len (512) + sequences = [] + total_length = len(normalized_data) + + i = 0 + while i < total_length: + chunk = normalized_data[i:i + max_seq_len] + sequence = chunk.reshape(1, 1, -1) + sequences.append(sequence) + i += max_seq_len + + logger.info(f"Created {len(sequences)} sequences, shapes: {[seq.shape for seq in sequences]}") + + return sequences + + except Exception as e: + logger.error(f"Error preparing time series data for MOMENT: {e}") + return None + + def create_moment_dataset(sequences: List[np.ndarray]): + """Create a dataset compatible with MOMENT from sequences.""" + import torch + from torch.utils.data import TensorDataset + + # Combine all sequences into a single tensor + # Each sequence has shape (1, 1, seq_len), we want (num_sequences, 1, seq_len) + data_tensors = [] + masks = [] + labels = [] # We'll use dummy labels since this is unsupervised + + for seq in sequences: + # seq shape: (1, 1, seq_len) -> squeeze to (1, seq_len) + seq_squeezed = seq.squeeze(0) # Remove first dimension: (1, seq_len) + data_tensors.append(torch.FloatTensor(seq_squeezed)) + + # Create mask (all True since we don't have missing values) + mask = torch.ones(seq_squeezed.shape[1], dtype=torch.bool) # seq_len + masks.append(mask) + + # Dummy label (0 for normal, we'll determine anomalies from reconstruction error) + labels.append(torch.tensor(0)) + + # Stack all tensors + data = torch.stack(data_tensors) # (num_sequences, 1, seq_len) + masks = torch.stack(masks) # (num_sequences, seq_len) + labels = torch.stack(labels) # (num_sequences,) + + return TensorDataset(data, masks, labels) + + def detect_anomalies_with_moment(sequences: List[np.ndarray], threshold_percentile: float) -> Tuple[np.ndarray, np.ndarray]: + """Detect anomalies using MOMENT-1-small foundation model following the official tutorial. + + Args: + sequences: List of sequences with shape (1, 1, seq_len) + threshold_percentile: Percentile for anomaly threshold + + Returns: + anomalies: Boolean array indicating anomalies + anomaly_scores: Array of reconstruction error scores (per timestep) + """ + logger.info("Starting MOMENT-based anomaly detection...") + + from torch.utils.data import DataLoader + from tqdm import tqdm + import torch + + # Use pre-initialized global model or initialize if needed + model, device = _initialize_moment_model() + + logger.info(f"Using cached MOMENT-1-small model for anomaly detection") + logger.info(f"Number of sequences to process: {len(sequences)}") + if sequences: + logger.info(f"Each sequence shape: {sequences[0].shape}") + + # Create dataset and dataloader following the tutorial + dataset = create_moment_dataset(sequences) + dataloader = DataLoader(dataset, batch_size=32, shuffle=False, drop_last=False) + logger.info(f"Using device: {device}") + + # Process batches following the tutorial pattern + trues, preds, labels = [], [], [] + with torch.no_grad(): + for batch_x, batch_masks, batch_labels in tqdm(dataloader, total=len(dataloader), desc="Processing batches"): + batch_x = batch_x.to(device).float() + batch_masks = batch_masks.to(device) + + # MOMENT forward pass + output = model(x_enc=batch_x, input_mask=batch_masks) + + # Collect results + trues.append(batch_x.detach().squeeze().cpu().numpy()) + preds.append(output.reconstruction.detach().squeeze().cpu().numpy()) + labels.append(batch_labels.detach().cpu().numpy()) + + # Concatenate all results following the tutorial + trues = np.concatenate(trues, axis=0) + preds = np.concatenate(preds, axis=0) + labels = np.concatenate(labels, axis=0) + + # Handle sequence length differences between input and MOMENT output + original_length = sequences[0].shape[2] if sequences else 0 # Get original input length + logger.info(f"Original sequence length: {original_length}, MOMENT output length: {trues.shape[0]}") + + # Handle overlapping windows if we have multiple sequences (following tutorial logic) + if len(sequences) > 1: + total_length = sum(seq.shape[2] for seq in sequences) # seq.shape = (1, 1, seq_len) + logger.info(f"Total original length: {total_length}, reconstructed length: {trues.shape[0]}") + + # If we have overlapping predictions, handle them + if trues.shape[0] > total_length: + # Keep first part and unique end part + n_unique_timesteps = 512 - trues.shape[0] + total_length + trues = np.concatenate([trues[:512*(total_length//512)], trues[-n_unique_timesteps:]]) + preds = np.concatenate([preds[:512*(total_length//512)], preds[-n_unique_timesteps:]]) + else: + # Single sequence case - flatten if needed + if len(trues.shape) > 1: + trues = trues.flatten() + preds = preds.flatten() + + logger.info(f"Final shapes - trues: {trues.shape}, preds: {preds.shape}") + + # Ensure shapes match for calculation + min_length = min(len(trues), len(preds)) + if len(trues) != len(preds): + logger.warning(f"Shape mismatch: trues={len(trues)}, preds={len(preds)}. Trimming to {min_length}") + trues = trues[:min_length] + preds = preds[:min_length] + + # Calculate anomaly scores using MSE (following tutorial) + anomaly_scores = (trues - preds) ** 2 + + # Determine anomaly threshold + threshold = np.percentile(anomaly_scores, threshold_percentile) + anomalies = anomaly_scores > threshold + + logger.info(f"MOMENT Anomaly Detection: {np.sum(anomalies)} anomalies detected out of {len(anomalies)} timesteps") + logger.info(f"Anomaly threshold ({threshold_percentile}th percentile): {threshold:.6f}") + logger.info(f"Anomaly scores range: {np.min(anomaly_scores):.6f} - {np.max(anomaly_scores):.6f}") + + return anomalies + + + + async def _response_fn( + sensor_data_json_path: str, + engine_unit: int = 5, + sensor_name: str = "sensor_measurement_1" + ) -> str: + """ + Perform anomaly detection using MOMENT-1-large foundation model on JSON data from sql_retriever. + """ + # Set default parameters (not exposed to LLM)ensor + threshold_percentile = 95.0 + + try: + if not sensor_data_json_path.lower().endswith('.json'): + return "sensor_data_json_path must be a path to a JSON file (ending with .json)" + + if not os.path.exists(sensor_data_json_path): + return f"JSON file not found at path: {sensor_data_json_path}" + + # Load data from JSON file (output from sql_retriever) + from ..plotting.plot_utils import load_data_from_json + combined_df = load_data_from_json(sensor_data_json_path) + + if combined_df is None or combined_df.empty: + return f"Could not load data or data is empty from JSON file: {sensor_data_json_path}" + + # Filter for specific engine unit if specified + if 'unit_number' in combined_df.columns: + engine_data = combined_df[combined_df['unit_number'] == engine_unit] + if engine_data.empty: + return f"No data found for engine unit {engine_unit} in the provided JSON file. Available units: {sorted(combined_df['unit_number'].unique())}" + + # Sort by cycle for proper time series analysis + if 'time_in_cycles' in engine_data.columns: + engine_data = engine_data.sort_values('time_in_cycles').reset_index(drop=True) + + logger.info(f"Engine data shape: {engine_data.shape}") + logger.info(f"Analyzing sensor: {sensor_name}") + logger.info(f"MOMENT sequence length: 512") + + # Prepare time series data for MOMENT (single sensor) + sequences = prepare_time_series_data_for_moment(engine_data, sensor_name, max_seq_len=512) + + if sequences is None: + return "Failed to prepare time series data for MOMENT analysis" + + logger.info("Starting MOMENT-based anomaly detection...") + anomaly_indices = detect_anomalies_with_moment(sequences, threshold_percentile) + + # Add is_anomaly column to the original dataframe + # Handle case where MOMENT output length differs from input length + if len(anomaly_indices) == len(engine_data): + engine_data['is_anomaly'] = anomaly_indices + elif len(anomaly_indices) < len(engine_data): + # MOMENT output is shorter - pad with False for remaining timesteps + padded_anomalies = np.zeros(len(engine_data), dtype=bool) + padded_anomalies[:len(anomaly_indices)] = anomaly_indices + engine_data['is_anomaly'] = padded_anomalies + logger.warning(f"MOMENT output length ({len(anomaly_indices)}) < input length ({len(engine_data)}). Padded with False.") + else: + # MOMENT output is longer - trim to match input length + engine_data['is_anomaly'] = anomaly_indices[:len(engine_data)] + logger.warning(f"MOMENT output length ({len(anomaly_indices)}) > input length ({len(engine_data)}). Trimmed to match.") + + # Calculate summary statistics using the final anomaly column + final_anomalies = engine_data['is_anomaly'] + total_anomalies = np.sum(final_anomalies) + anomaly_rate = total_anomalies / len(final_anomalies) * 100 + + # Save results + os.makedirs(config.output_folder, exist_ok=True) + + # Save the original data with is_anomaly column added + results_filename = f"moment_anomaly_results_engine{engine_unit}.json" + results_filepath = os.path.join(config.output_folder, results_filename) + engine_data.to_json(results_filepath, orient='records', indent=2) + + # Build comprehensive response + response_parts = [ + "MOMENT-1-LARGE FOUNDATION MODEL ANOMALY DETECTION COMPLETED SUCCESSFULLY", + "", + f"Analysis Details:", + f" • Engine Unit: {engine_unit}", + f" • Source Data: {os.path.basename(sensor_data_json_path)}", + f" • Sensor Analyzed: {sensor_name}", + f" • Model: MOMENT-1-Large Foundation Model", + f" • Max Sequence Length: 512", + f" • Threshold Percentile: {threshold_percentile}%", + "", + f"Anomaly Detection Results:", + f" • Total Timesteps Analyzed: {len(final_anomalies)}", + f" • Anomalous Timesteps Detected: {total_anomalies}", + f" • Anomaly Rate: {anomaly_rate:.2f}%", + "", + f"Output Files Generated:", + f" • Enhanced Data with is_anomaly Column: {results_filepath}" + ] + + response_parts.extend([ + "", + f"Key Insights:", + f" • MOMENT-1-Large foundation model provides state-of-the-art time series anomaly detection", + f" • Pre-trained on diverse time series data for superior pattern recognition without additional training", + f" • {total_anomalies} anomalous time periods identified out of {len(final_anomalies)} analyzed sequences", + "", + f"Output Format:", + f" • Original sensor data with added 'is_anomaly' boolean column", + f" • Use the enhanced JSON file with plot_anomaly_tool for visualization", + "", + "MOMENT-1-LARGE ANOMALY DETECTION COMPLETE" + ]) + + return "\n".join(response_parts) + + except Exception as e: + error_msg = f"Error performing MOMENT-based anomaly detection: {e}" + logger.error(error_msg) + return error_msg + + description = """ + Perform state-of-the-art anomaly detection using MOMENT-1-Large foundation model on sensor data from JSON files. + Outputs detailed anomaly detection results. Use plot_anomaly_tool afterward for visualization. + + Input: + - sensor_data_json_path: File path to a JSON containing sensor data. The file must include timestamp and engine unit number columns along with sensor data columns. + - engine_unit: Engine unit number to analyze (default: 5) + - sensor_name: Name of the specific sensor to analyze and plot (e.g., 'sensor_measurement_1', 'sensor_measurement_4', 'sensor_measurement_7', 'sensor_measurement_11') (default: 'sensor_measurement_1') + + Output: + - JSON file containing original sensor data with added 'is_anomaly' boolean column + - Comprehensive analysis summary with key insights + """ + + yield FunctionInfo.from_fn(_response_fn, + input_schema=MomentAnomalyDetectionInputSchema, + description=description) + try: + pass + except GeneratorExit: + logger.info("moment based anomaly detection function exited early!") + finally: + logger.info("Cleaning up moment based anomaly detection workflow.") \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/predictors/predict_rul_tool.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/predictors/predict_rul_tool.py new file mode 100644 index 00000000..e236545e --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/predictors/predict_rul_tool.py @@ -0,0 +1,251 @@ +import json +import logging +import os +import warnings +import pickle +import joblib +import numpy as np + +from pydantic import Field, BaseModel + +from aiq.builder.builder import Builder +from aiq.builder.function_info import FunctionInfo +from aiq.cli.register_workflow import register_function +from aiq.data_models.function import FunctionBaseConfig + +logger = logging.getLogger(__name__) + +def verify_json_path(file_path: str) -> str: + """ + Verify that the input is a valid path to a JSON file. + + Args: + file_path (str): Path to verify + + Returns: + str: Verified file path + + Raises: + ValueError: If input is not a string or not a JSON file + FileNotFoundError: If file does not exist + json.JSONDecodeError: If file contains invalid JSON + """ + if not isinstance(file_path, str): + return "Input must be a string path to a JSON file" + + if not file_path.lower().endswith('.json'): + return "Input must be a path to a JSON file (ending with .json)" + + if not os.path.exists(file_path): + return f"JSON file not found at path: {file_path}" + + try: + with open(file_path, 'r') as f: + json.load(f) # Verify file contains valid JSON + except json.JSONDecodeError: + return f"File at {file_path} does not contain valid JSON data" + + return file_path + +class PredictRulToolConfig(FunctionBaseConfig, name="predict_rul_tool"): + """ + AIQ Toolkit function to predict RUL (Remaining Useful Life) using trained models and provided data. + """ + # Runtime configuration parameters + scaler_path: str = Field(description="Path to the trained StandardScaler model.", default="./models/scaler_model.pkl") + model_path: str = Field(description="Path to the trained XGBoost model.", default="./models/xgb_model_fd001.pkl") + output_folder: str = Field(description="The path to the output folder to save prediction results.", default="./output_data") + +@register_function(config_type=PredictRulToolConfig) +async def predict_rul_tool( + config: PredictRulToolConfig, builder: Builder +): + class PredictRulInputSchema(BaseModel): + json_file_path: str = Field(description="Path to a JSON file containing sensor measurements data for RUL prediction") + + def load_data_from_json(json_path: str): + """Load data from JSON file into a pandas DataFrame.""" + import pandas as pd + try: + with open(json_path, 'r') as f: + data = json.load(f) + return pd.DataFrame(data) + except FileNotFoundError: + logger.warn(f"JSON file not found at {json_path}") + return None + except json.JSONDecodeError: + logger.warn(f"Could not decode JSON from {json_path}") + return None + except Exception as e: + logger.warn(f"Error loading data from '{json_path}': {e}") + return None + + def predict_rul_from_data(data_json_path: str, scaler_path: str, model_path: str, output_dir: str): + """ + Load data and trained models to make RUL predictions. + + Args: + data_json_path (str): Path to the input JSON data file. + scaler_path (str): Path to the trained StandardScaler model. + model_path (str): Path to the trained XGBoost model. + output_dir (str): Directory to save prediction results (unused - kept for compatibility). + + Returns: + tuple: (predictions array, original file path) + """ + import pandas as pd + + # Suppress warnings + warnings.filterwarnings("ignore", message="X does not have valid feature names") + + # Load the data + df = load_data_from_json(data_json_path) + if df is None or df.empty: + raise ValueError(f"Could not load data or data is empty from {data_json_path}") + + # Prepare features for prediction (exclude non-feature columns if present) + required_columns = ['sensor_measurement_2', + 'sensor_measurement_3', + 'sensor_measurement_4', + 'sensor_measurement_7', + 'sensor_measurement_8', + 'sensor_measurement_11', + 'sensor_measurement_12', + 'sensor_measurement_13', + 'sensor_measurement_15', + 'sensor_measurement_17', + 'sensor_measurement_20', + 'sensor_measurement_21'] + feature_columns = [col for col in df.columns if col in required_columns] + if not feature_columns: + raise ValueError(f"No valid feature columns found in the data. Available columns: {df.columns.tolist()}") + + X_test = df[feature_columns].values + logger.info(f"Using {len(feature_columns)} features for prediction: {feature_columns}") + + # Load the StandardScaler + try: + scaler_loaded = joblib.load(scaler_path) + logger.info(f"Successfully loaded scaler from {scaler_path}") + except Exception as e: + raise FileNotFoundError(f"Could not load scaler from {scaler_path}: {e}") + + # Transform the test data using the loaded scaler + X_test_scaled = scaler_loaded.transform(X_test) + + # Load the XGBoost model + try: + with open(model_path, 'rb') as f: + xgb_model = pickle.load(f) + logger.info(f"Successfully loaded XGBoost model from {model_path}") + except Exception as e: + raise FileNotFoundError(f"Could not load XGBoost model from {model_path}: {e}") + + # Make predictions + y_pred = xgb_model.predict(X_test_scaled) + logger.info(f"Generated {len(y_pred)} RUL predictions") + + # Create results DataFrame + results_df = df.copy() + results_df = results_df.rename(columns={'RUL': 'actual_RUL'}) + results_df['predicted_RUL'] = y_pred + + # Save results back to the original JSON file + results_json = results_df.to_dict('records') + with open(data_json_path, 'w') as f: + json.dump(results_json, f, indent=2) + + logger.info(f"Prediction results saved back to original file: {data_json_path}") + + return y_pred, data_json_path + + async def _response_fn(json_file_path: str) -> str: + """ + Process the input message and generate RUL predictions using trained models. + """ + logger.info(f"Input message: {json_file_path}") + data_json_path = verify_json_path(json_file_path) + try: + predictions, output_filepath = predict_rul_from_data( + data_json_path=data_json_path, + scaler_path=config.scaler_path, + model_path=config.model_path, + output_dir=config.output_folder + ) + + # Generate summary statistics + avg_rul = np.mean(predictions) + min_rul = np.min(predictions) + max_rul = np.max(predictions) + std_rul = np.std(predictions) + + # Create response with prediction summary + response = f"""RUL predictions generated successfully! 📊 + +**Prediction Summary:** +- **Total predictions:** {len(predictions)} +- **Average RUL:** {avg_rul:.2f} cycles +- **Minimum RUL:** {min_rul:.2f} cycles +- **Maximum RUL:** {max_rul:.2f} cycles +- **Standard Deviation:** {std_rul:.2f} cycles + +**Results saved to:** {output_filepath} + +The predictions have been added to the original dataset with column name 'predicted_RUL'. The original JSON file has been updated with the RUL predictions. +All columns from the original dataset have been preserved, and the predicted RUL column has been renamed to 'predicted_RUL' and the actual RUL column has been renamed to 'actual_RUL'.""" + + return response + + except FileNotFoundError as e: + error_msg = f"Required file not found for RUL prediction: {e}. Please ensure all model files and data are available." + logger.warn(error_msg) + return error_msg + except ValueError as ve: + error_msg = f"Data validation error for RUL prediction: {ve}. Check the input data format." + logger.warn(error_msg) + return error_msg + except Exception as e: + error_msg = f"Error during RUL prediction: {e}" + logger.warn(error_msg) + return error_msg + + prompt = """ + Predict RUL (Remaining Useful Life) for turbofan engines using trained machine learning models. + + Input: + - Path to a JSON file containing sensor measurements. + + Required columns: + * sensor_measurement_2 + * sensor_measurement_3 + * sensor_measurement_4 + * sensor_measurement_7 + * sensor_measurement_8 + * sensor_measurement_11 + * sensor_measurement_12 + * sensor_measurement_13 + * sensor_measurement_15 + * sensor_measurement_17 + * sensor_measurement_20 + * sensor_measurement_21 + + Process: + 1. Load and preprocess data using StandardScaler + 2. Generate predictions using XGBoost model + 3. Calculate summary statistics (mean, min, max, std dev) + 4. Save predictions to JSON file + + Output: + - RUL predictions for each engine unit + - Summary statistics of predictions + - Updated JSON file with predictions added as 'predicted_RUL' column + """ + yield FunctionInfo.from_fn(_response_fn, + input_schema=PredictRulInputSchema, + description=prompt) + try: + pass + except GeneratorExit: + logger.info("Predict RUL function exited early!") + finally: + logger.info("Cleaning up predict_rul_tool workflow.") diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/register.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/register.py new file mode 100644 index 00000000..7fd9a473 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/register.py @@ -0,0 +1,14 @@ +# pylint: disable=unused-import +# flake8: noqa + +# Import any tools which need to be automatically registered here +from .retrievers import generate_sql_query_and_retrieve_tool +from .predictors import predict_rul_tool +from .plotting import plot_distribution_tool +from .plotting import plot_comparison_tool +from .plotting import plot_line_chart_tool +from .plotting import plot_anomaly_tool +from .plotting import code_generation_assistant +from .predictors import moment_anomaly_detection_tool +from .evaluators import llm_judge_evaluator_register +from .evaluators import multimodal_llm_judge_evaluator_register diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/retrievers/__init__.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/retrievers/__init__.py new file mode 100644 index 00000000..1c67f641 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/retrievers/__init__.py @@ -0,0 +1,15 @@ +""" +Retrievers package for predictive maintenance agent. + +This package contains components for data retrieval and SQL query generation +for predictive maintenance workflows. +""" + +from .vanna_manager import VannaManager +from .vanna_util import * +from . import generate_sql_query_and_retrieve_tool + +__all__ = [ + "VannaManager", + "generate_sql_query_and_retrieve_tool", +] \ No newline at end of file diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/retrievers/generate_sql_query_and_retrieve_tool.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/retrievers/generate_sql_query_and_retrieve_tool.py new file mode 100644 index 00000000..d7d7e0d0 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/retrievers/generate_sql_query_and_retrieve_tool.py @@ -0,0 +1,229 @@ +import json +import logging +import os + +from pydantic import Field, BaseModel + +from aiq.builder.builder import Builder +from aiq.builder.function_info import FunctionInfo +from aiq.cli.register_workflow import register_function +from aiq.data_models.function import FunctionBaseConfig +from aiq.builder.framework_enum import LLMFrameworkEnum + +logger = logging.getLogger(__name__) + +class GenerateSqlQueryAndRetrieveToolConfig(FunctionBaseConfig, name="generate_sql_query_and_retrieve_tool"): + """ + AIQ Toolkit function to generate SQL queries and retrieve data. + """ + # Runtime configuration parameters + llm_name: str = Field(description="The name of the LLM to use for the function.") + embedding_name: str = Field(description="The name of the embedding to use for the function.") + vector_store_path: str = Field(description="The path to the vector store to use for the function.") + db_path: str = Field(description="The path to the SQL database to use for the function.") + output_folder: str = Field(description="The path to the output folder to use for the function.") + vanna_training_data_path: str = Field(description="The path to the YAML file containing Vanna training data.") + +@register_function(config_type=GenerateSqlQueryAndRetrieveToolConfig) +async def generate_sql_query_and_retrieve_tool( + config: GenerateSqlQueryAndRetrieveToolConfig, builder: Builder +): + """ + Generate a SQL query for a given question and retrieve the data from the database. + """ + class GenerateSqlQueryInputSchema(BaseModel): + input_question_in_english: str = Field(description="User's question in plain English to generate SQL query for") + + # Create Vanna instance + vanna_llm_config = builder.get_llm_config(config.llm_name) + vanna_embedder_config = builder.get_embedder_config(config.embedding_name) + + from langchain_core.prompts.chat import ChatPromptTemplate + + llm = await builder.get_llm(config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN) + + system_prompt = """ + You are an intelligent SQL query assistant that analyzes database query results and provides appropriate responses. + + Your responsibilities: + 1. Analyze the SQL query results and determine the best response format. + 2. For data extraction queries (multiple rows/complex data): recommend saving to JSON file and provide summary. + 3. For simple queries (single values, counts, yes/no, simple lookups): provide DIRECT answers without file storage. + 4. Always be helpful and provide context about the results. + 5. Generate a descriptive filename for data that should be saved. + + Guidelines: + + - If results contain multiple rows or complex data (>5 rows or >3 columns) AND the query is for data analysis/processing: recommend saving to file + - If results are simple (single value, count, or small lookup): provide only the direct answer even if a file was created for the results. + - Always mention the SQL query that was executed. + - For files to be saved, suggest a descriptive filename based on the query content (e.g., "sensor_data_unit_5.json", "engine_performance_analysis.json"). + - Important: Do not use template variables or placeholders in your response. Provide actual values and descriptions. + + Be conversational and helpful. Explain what was found. + """ + # CRITICAL INSTRUCTION: If the question asks for unit numbers or IDs (e.g., "what are their unit numbers"): + # - Provide the COMPLETE list of ALL unit numbers from the data + # - Never say "not shown in sample" or "additional values" + # - Extract all unit_number values from the complete dataset, not just the sample + # - If you see unit numbers 40, 82, 174, 184 in the data, list ALL of them explicitly + # """ + + user_prompt = """ + Original Question: {original_question} + + SQL Query Executed: {sql_query} + + Query Results: + - Number of rows: {num_rows} + - Number of columns: {num_columns} + - Columns: {columns} + - Sample data (first few rows): {sample_data} + + Output directory: {output_dir} + """ + + prompt = ChatPromptTemplate.from_messages([("system", system_prompt), ("user", user_prompt)]) + output_message = prompt | llm + + from .vanna_manager import VannaManager + + # Create a VannaManager instance with full configuration + # This will trigger immediate Vanna instance creation and training during initialization + vanna_manager = VannaManager.create_with_config( + vanna_llm_config=vanna_llm_config, + vanna_embedder_config=vanna_embedder_config, + vector_store_path=config.vector_store_path, + db_path=config.db_path, + training_data_path=config.vanna_training_data_path + ) + + def get_vanna_instance(): + """ + Get the pre-initialized Vanna instance from VannaManager. + Training has already been completed during VannaManager initialization. + """ + return vanna_manager.get_instance() + + async def _response_fn(input_question_in_english: str) -> str: + # Process the input_question_in_english and generate output using VannaManager + logger.info(f"RESPONSE: Starting question processing for: {input_question_in_english}") + + sql = None + try: + # CRITICAL: Ensure VannaManager instance is created before using it + # This creates the instance if it doesn't exist (lazy initialization) + vn_instance = get_vanna_instance() + + # Use VannaManager for safe SQL generation + sql = vanna_manager.generate_sql_safe(question=input_question_in_english) + logger.info(f"Generated SQL: {sql}") + + except Exception as e: + logger.error(f"RESPONSE: Exception during generate_sql_safe: {e}") + return f"Error generating SQL: {e}" + + # vn_instance is already available from above + + if not vn_instance.run_sql_is_set: + return f"Database is not connected via Vanna: {sql}" + + try: + df = vn_instance.run_sql(sql) + if df is None: + return f"Vanna run_sql returned None: {sql}" + if df.empty: + return f"No data found for the generated SQL: {sql}" + + num_rows = df.shape[0] + num_columns = df.shape[1] + columns = df.columns.tolist() + + # Get sample data (first 3 rows for preview) + sample_data = df.head(3).to_dict('records') + + # Use LLM to generate intelligent response + response = await output_message.ainvoke({ + "original_question": input_question_in_english, + "sql_query": sql, + "num_rows": num_rows, + "num_columns": num_columns, + "columns": ", ".join(columns), + "sample_data": json.dumps(sample_data, indent=2), + "output_dir": config.output_folder + }) + + # Check if LLM response suggests saving data (look for keywords or patterns) + llm_response = response.content if hasattr(response, 'content') else str(response) + + # Save data if it's complex (multiple rows or columns) or LLM suggests saving + should_save_data = ( + num_rows > 5 or + num_columns > 3 or + "save" in llm_response.lower() or + "saved" in llm_response.lower() or + "file" in llm_response.lower() + ) + + if should_save_data: + # Extract suggested filename from LLM response or use default + import re + filename_match = re.search(r'"([^"]+\.json)"', llm_response) + if filename_match: + suggested_filename = filename_match.group(1) + else: + # Generate a descriptive filename based on the question + import hashlib + # Clean the question for filename + clean_question = re.sub(r'[^\w\s-]', '', input_question_in_english.lower()) + clean_question = re.sub(r'\s+', '_', clean_question.strip())[:30] + if clean_question: + suggested_filename = f"{clean_question}_results.json" + else: + query_hash = hashlib.md5(input_question_in_english.encode()).hexdigest()[:8] + suggested_filename = f"sql_results_{query_hash}.json" + + sql_output_path = os.path.join(config.output_folder, suggested_filename) + + # Save the data to JSON file + os.makedirs(config.output_folder, exist_ok=True) + json_result = df.to_json(orient="records") + with open(sql_output_path, 'w') as f: + json.dump(json.loads(json_result), f, indent=4) + logger.info(f"Data saved to {sql_output_path}") + + # Clean up the LLM response and add file save confirmation + # Remove any object references that might have slipped through + cleaned_response = re.sub(r',\[object Object\],?', '', llm_response) + cleaned_response = re.sub(r'\[object Object\]', str(num_rows), cleaned_response) + + # If LLM didn't mention the actual saved path, append save confirmation + if sql_output_path not in cleaned_response: + cleaned_response += f"\n\n📁 Data has been saved to: {sql_output_path}" + cleaned_response += f"\n📊 File contains {num_rows} rows with columns: {', '.join(columns)}" + + return cleaned_response + + return llm_response + + except Exception as e: + return f"Error running SQL query '{sql}': {e}" + + description = """ + Use this tool to automatically generate SQL queries for the user's question, retrieve the data from the SQL database and provide a summary of the data or save the data in a JSON file. + Do not provide SQL query as input, only a question in plain english. + + Input: + - input_question_in_english: User's question or a question that you think is relevant to the user's question in plain english + + Output: Status of the generated SQL query's execution along with the output path. The tool will automatically generate descriptive filenames for saved data. + """ + yield FunctionInfo.from_fn(_response_fn, + input_schema=GenerateSqlQueryInputSchema, + description=description) + try: + pass + except GeneratorExit: + logger.info("Generate SQL query and retrieve function exited early!") + finally: + logger.info("Cleaning up generate_sql_query_and_retrieve_tool workflow.") diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/retrievers/vanna_manager.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/retrievers/vanna_manager.py new file mode 100644 index 00000000..6641c898 --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/retrievers/vanna_manager.py @@ -0,0 +1,255 @@ +""" +VannaManager - A simplified manager for Vanna instances +""" +import os +import logging +import threading +import hashlib +from typing import Dict, Optional +from .vanna_util import NIMVanna, initVanna, CustomEmbeddingFunction + +logger = logging.getLogger(__name__) + +class VannaManager: + """ + A simplified singleton manager for Vanna instances. + + Key features: + - Singleton pattern to ensure only one instance per configuration + - Thread-safe operations + - Simple instance management + """ + + _instances: Dict[str, 'VannaManager'] = {} + _lock = threading.Lock() + + def __new__(cls, config_key: str): + """Ensure singleton pattern per configuration""" + with cls._lock: + if config_key not in cls._instances: + logger.debug(f"VannaManager: Creating new singleton instance for config: {config_key}") + cls._instances[config_key] = super().__new__(cls) + cls._instances[config_key]._initialized = False + else: + logger.debug(f"VannaManager: Returning existing singleton instance for config: {config_key}") + return cls._instances[config_key] + + def __init__(self, config_key: str, vanna_llm_config=None, vanna_embedder_config=None, vector_store_path: str = None, db_path: str = None, training_data_path: str = None): + """Initialize the VannaManager and create Vanna instance immediately if all config is provided""" + if hasattr(self, '_initialized') and self._initialized: + return + + self.config_key = config_key + self.lock = threading.Lock() + + # Store configuration + self.vanna_llm_config = vanna_llm_config + self.vanna_embedder_config = vanna_embedder_config + self.vector_store_path = vector_store_path + self.db_path = db_path + self.training_data_path = training_data_path + + # Create and initialize Vanna instance immediately if all required config is provided + self.vanna_instance = None + if all([vanna_llm_config, vanna_embedder_config, vector_store_path, db_path]): + logger.debug(f"VannaManager: Initializing with immediate Vanna instance creation") + self.vanna_instance = self._create_instance() + else: + if any([vanna_llm_config, vanna_embedder_config, vector_store_path, db_path]): + logger.debug(f"VannaManager: Partial configuration provided, Vanna instance will be created later") + else: + logger.debug(f"VannaManager: No configuration provided, Vanna instance will be created later") + + self._initialized = True + logger.debug(f"VannaManager initialized for config: {config_key}") + + def get_instance(self, vanna_llm_config=None, vanna_embedder_config=None, vector_store_path: str = None, db_path: str = None, training_data_path: str = None) -> NIMVanna: + """ + Get the Vanna instance. If not created during init, create it now with provided parameters. + """ + with self.lock: + if self.vanna_instance is None: + logger.debug(f"VannaManager: No instance created during init, creating now...") + + # Update configuration with provided parameters + self.vanna_llm_config = vanna_llm_config or self.vanna_llm_config + self.vanna_embedder_config = vanna_embedder_config or self.vanna_embedder_config + self.vector_store_path = vector_store_path or self.vector_store_path + self.db_path = db_path or self.db_path + self.training_data_path = training_data_path or self.training_data_path + + if all([self.vanna_llm_config, self.vanna_embedder_config, self.vector_store_path, self.db_path]): + self.vanna_instance = self._create_instance() + else: + raise RuntimeError("VannaManager: Missing required configuration parameters") + else: + logger.debug(f"VannaManager: Returning pre-initialized Vanna instance (ID: {id(self.vanna_instance)})") + + # Show vector store status for pre-initialized instances + try: + if os.path.exists(self.vector_store_path): + list_of_folders = [d for d in os.listdir(self.vector_store_path) + if os.path.isdir(os.path.join(self.vector_store_path, d))] + logger.debug(f"VannaManager: Vector store contains {len(list_of_folders)} collections/folders") + if list_of_folders: + logger.debug(f"VannaManager: Vector store folders: {list_of_folders}") + else: + logger.debug(f"VannaManager: Vector store directory does not exist") + except Exception as e: + logger.warning(f"VannaManager: Could not check vector store status: {e}") + + return self.vanna_instance + + def _create_instance(self) -> NIMVanna: + """ + Create a new Vanna instance using the stored configuration. + """ + logger.info(f"VannaManager: Creating instance for {self.config_key}") + logger.debug(f"VannaManager: Vector store path: {self.vector_store_path}") + logger.debug(f"VannaManager: Database path: {self.db_path}") + logger.debug(f"VannaManager: Training data path: {self.training_data_path}") + + # Create instance + vn_instance = NIMVanna( + VectorConfig={ + "client": "persistent", + "path": self.vector_store_path, + "embedding_function": CustomEmbeddingFunction( + api_key=os.getenv("NVIDIA_API_KEY"), + model=self.vanna_embedder_config.model_name) + }, + LLMConfig={ + "api_key": os.getenv("NVIDIA_API_KEY"), + "model": self.vanna_llm_config.model_name + } + ) + + # Connect to database + logger.debug(f"VannaManager: Connecting to SQLite database...") + vn_instance.connect_to_sqlite(self.db_path) + + # Set configuration - allow LLM to see data for database introspection + vn_instance.allow_llm_to_see_data = True + logger.debug(f"VannaManager: Set allow_llm_to_see_data = True") + + # Initialize if needed (check if vector store is empty) + needs_init = self._needs_initialization() + if needs_init: + logger.info("VannaManager: Vector store needs initialization, starting training...") + try: + initVanna(vn_instance, self.training_data_path) + logger.info("VannaManager: Vector store initialization complete") + except Exception as e: + logger.error(f"VannaManager: Error during initialization: {e}") + raise + else: + logger.debug("VannaManager: Vector store already initialized, skipping training") + + logger.info(f"VannaManager: Instance created successfully") + return vn_instance + + def _needs_initialization(self) -> bool: + """ + Check if the vector store needs initialization by checking if it's empty. + """ + logger.debug(f"VannaManager: Checking if vector store needs initialization...") + logger.debug(f"VannaManager: Vector store path: {self.vector_store_path}") + + try: + if not os.path.exists(self.vector_store_path): + logger.debug(f"VannaManager: Vector store directory does not exist -> needs initialization") + return True + + # Check if there are any subdirectories (ChromaDB creates subdirectories when data is stored) + list_of_folders = [d for d in os.listdir(self.vector_store_path) + if os.path.isdir(os.path.join(self.vector_store_path, d))] + + logger.debug(f"VannaManager: Found {len(list_of_folders)} folders in vector store") + if list_of_folders: + logger.debug(f"VannaManager: Vector store folders: {list_of_folders}") + logger.debug(f"VannaManager: Vector store is populated -> skipping initialization") + return False + else: + logger.debug(f"VannaManager: Vector store is empty -> needs initialization") + return True + + except Exception as e: + logger.warning(f"VannaManager: Could not check vector store status: {e}") + logger.warning(f"VannaManager: Defaulting to needs initialization = True") + return True + + def generate_sql_safe(self, question: str) -> str: + """ + Generate SQL with error handling. + """ + with self.lock: + if self.vanna_instance is None: + raise RuntimeError("VannaManager: No instance available") + + try: + logger.debug(f"VannaManager: Generating SQL for question: {question}") + + # Generate SQL with allow_llm_to_see_data=True for database introspection + sql = self.vanna_instance.generate_sql(question=question, allow_llm_to_see_data=True) + + # Validate SQL response + if not sql or sql.strip() == "": + raise ValueError("Empty SQL response") + + return sql + + except Exception as e: + logger.error(f"VannaManager: Error in SQL generation: {e}") + raise + + def force_reset(self): + """ + Force reset the instance (useful for cleanup). + """ + with self.lock: + if self.vanna_instance: + logger.debug(f"VannaManager: Resetting instance for {self.config_key}") + self.vanna_instance = None + + def get_stats(self) -> Dict: + """ + Get manager statistics. + """ + return { + "config_key": self.config_key, + "instance_id": id(self.vanna_instance) if self.vanna_instance else None, + "has_instance": self.vanna_instance is not None + } + + @classmethod + def create_with_config(cls, vanna_llm_config, vanna_embedder_config, vector_store_path: str, db_path: str, training_data_path: str = None): + """ + Class method to create a VannaManager with full configuration. + Uses create_config_key to ensure singleton behavior based on configuration. + """ + config_key = create_config_key(vanna_llm_config, vanna_embedder_config, vector_store_path, db_path) + + # Create instance with just config_key (singleton pattern) + instance = cls(config_key) + + # If this is a new instance that hasn't been configured yet, set the configuration + if not hasattr(instance, 'vanna_llm_config') or instance.vanna_llm_config is None: + instance.vanna_llm_config = vanna_llm_config + instance.vanna_embedder_config = vanna_embedder_config + instance.vector_store_path = vector_store_path + instance.db_path = db_path + instance.training_data_path = training_data_path + + # Create Vanna instance immediately if all config is available + if instance.vanna_instance is None: + logger.debug(f"VannaManager: Creating Vanna instance for existing singleton") + instance.vanna_instance = instance._create_instance() + + return instance + +def create_config_key(vanna_llm_config, vanna_embedder_config, vector_store_path: str, db_path: str) -> str: + """ + Create a unique configuration key for the VannaManager singleton. + """ + config_str = f"{vanna_llm_config.model_name}_{vanna_embedder_config.model_name}_{vector_store_path}_{db_path}" + return hashlib.md5(config_str.encode()).hexdigest()[:12] diff --git a/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/retrievers/vanna_util.py b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/retrievers/vanna_util.py new file mode 100644 index 00000000..1fe93d9a --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/src/predictive_maintenance_agent/retrievers/vanna_util.py @@ -0,0 +1,518 @@ +from vanna.chromadb import ChromaDB_VectorStore +from vanna.base import VannaBase +from langchain_nvidia import ChatNVIDIA +from tqdm import tqdm + +class NIMCustomLLM(VannaBase): + def __init__(self, config=None): + VannaBase.__init__(self, config=config) + + if not config: + raise ValueError("config must be passed") + + # default parameters - can be overrided using config + self.temperature = 0.7 + + if "temperature" in config: + self.temperature = config["temperature"] + + # If only config is passed + if "api_key" not in config: + raise ValueError("config must contain a NIM api_key") + + if "model" not in config: + raise ValueError("config must contain a NIM model") + + api_key = config["api_key"] + model = config["model"] + + # Initialize ChatNVIDIA client + self.client = ChatNVIDIA( + api_key=api_key, + model=model, + temperature=self.temperature, + ) + self.model = model + + def system_message(self, message: str) -> any: + return {"role": "system", "content": message+"\n DO NOT PRODUCE MARKDOWN, ONLY RESPOND IN PLAIN TEXT"} + + def user_message(self, message: str) -> any: + return {"role": "user", "content": message} + + def assistant_message(self, message: str) -> any: + return {"role": "assistant", "content": message} + + def submit_prompt(self, prompt, **kwargs) -> str: + if prompt is None: + raise Exception("Prompt is None") + + if len(prompt) == 0: + raise Exception("Prompt is empty") + + # Count the number of tokens in the message log + # Use 4 as an approximation for the number of characters per token + num_tokens = 0 + for message in prompt: + num_tokens += len(message["content"]) / 4 + print(f"Using model {self.model} for {num_tokens} tokens (approx)") + + response = self.client.invoke(prompt) + return response.content + +class NIMVanna(ChromaDB_VectorStore, NIMCustomLLM): + def __init__(self, VectorConfig = None, LLMConfig = None): + ChromaDB_VectorStore.__init__(self, config=VectorConfig) + NIMCustomLLM.__init__(self, config=LLMConfig) + +class CustomEmbeddingFunction: + """ + A class that can be used as a replacement for chroma's DefaultEmbeddingFunction. + It takes in input (text or list of texts) and returns embeddings using NVIDIA's API. + """ + + def __init__(self, api_key, model="nvidia/nv-embedqa-e5-v5"): + """ + Initialize the embedding function with the API key and model name. + + Parameters: + - api_key (str): The API key for authentication. + - model (str): The model name to use for embeddings (default is "nvidia/nv-embedqa-e5-v5"). + """ + from langchain_nvidia import NVIDIAEmbeddings + + self.embeddings = NVIDIAEmbeddings( + api_key=api_key, + model_name=model, + input_type="query", + truncate="NONE" + ) + + def __call__(self, input): + """ + Call method to make the object callable, as required by chroma's EmbeddingFunction interface. + + Parameters: + - input (str or list): The input data for which embeddings need to be generated. + + Returns: + - embedding (list): The embedding vector(s) for the input data. + """ + # Ensure input is a list, as required by the API + input_data = [input] if isinstance(input, str) else input + + # Generate embeddings + embeddings = [] + for text in input_data: + embedding = self.embeddings.embed_query(text) + embeddings.append(embedding) + + return embeddings[0] if len(embeddings) == 1 and isinstance(input, str) else embeddings + + def name(self): + """ + Returns a custom name for the embedding function. + + Returns: + str: The name of the embedding function. + """ + return "NVIDIA Embedding Function" + +def initVannaBackup(vn): + """ + Backup initialization function for Vanna with hardcoded NASA Turbofan Engine training data. + + This function provides the original hardcoded training approach for NASA Turbofan Engine + predictive maintenance queries. Use this as a fallback if the JSON-based training fails. + + Args: + vn: Vanna instance to be trained and configured + + Returns: + None: Modifies the Vanna instance in-place + + Example: + >>> from vanna.chromadb import ChromaDB_VectorStore + >>> vn = NIMCustomLLM(config) & ChromaDB_VectorStore() + >>> vn.connect_to_sqlite("path/to/nasa_turbo.db") + >>> initVannaBackup(vn) + >>> # Vanna is now ready with hardcoded NASA Turbofan training + """ + import json + import os + + # Get and train DDL from sqlite_master + df_ddl = vn.run_sql("SELECT type, sql FROM sqlite_master WHERE sql is not null") + for ddl in df_ddl['sql'].to_list(): + vn.train(ddl=ddl) + + # Fallback to default NASA Turbofan training + fd_datasets = ["FD001", "FD002", "FD003", "FD004"] + for fd in fd_datasets: + vn.train(ddl=f""" + CREATE TABLE IF NOT EXISTS RUL_{fd} ( + "unit_number" INTEGER, + "RUL" INTEGER + ) + """) + + sensor_columns = """ + "unit_number" INTEGER, + "time_in_cycles" INTEGER, + "operational_setting_1" REAL, + "operational_setting_2" REAL, + "operational_setting_3" REAL, + "sensor_measurement_1" REAL, + "sensor_measurement_2" REAL, + "sensor_measurement_3" REAL, + "sensor_measurement_4" REAL, + "sensor_measurement_5" REAL, + "sensor_measurement_6" REAL, + "sensor_measurement_7" REAL, + "sensor_measurement_8" REAL, + "sensor_measurement_9" REAL, + "sensor_measurement_10" REAL, + "sensor_measurement_11" REAL, + "sensor_measurement_12" REAL, + "sensor_measurement_13" REAL, + "sensor_measurement_14" REAL, + "sensor_measurement_15" REAL, + "sensor_measurement_16" REAL, + "sensor_measurement_17" INTEGER, + "sensor_measurement_18" INTEGER, + "sensor_measurement_19" REAL, + "sensor_measurement_20" REAL, + "sensor_measurement_21" REAL + """ + + for fd in fd_datasets: + vn.train(ddl=f"CREATE TABLE IF NOT EXISTS train_{fd} ({sensor_columns})") + vn.train(ddl=f"CREATE TABLE IF NOT EXISTS test_{fd} ({sensor_columns})") + + # Default documentation for NASA Turbofan + dataset_documentation = """ + This SQL database contains train and test splits of four different datasets: FD001, FD002, FD003, FD004. + Each dataset consists of multiple multivariate time series from different engines of the same type. + + DATABASE STRUCTURE: + The data is organized into separate tables for each dataset: + + Training Tables: train_FD001, train_FD002, train_FD003, train_FD004 + Test Tables: test_FD001, test_FD002, test_FD003, test_FD004 + RUL Tables: RUL_FD001, RUL_FD002, RUL_FD003, RUL_FD004 + + Each training and test table contains 26 columns with identical structure: + - unit_number: INTEGER - Identifier for each engine unit + - time_in_cycles: INTEGER - Time step in operational cycles + - operational_setting_1: REAL - First operational setting affecting performance + - operational_setting_2: REAL - Second operational setting affecting performance + - operational_setting_3: REAL - Third operational setting affecting performance + - sensor_measurement_1 through sensor_measurement_21: REAL/INTEGER - Twenty-one sensor measurements + + Each RUL table contains 2 columns: + - unit_number: INTEGER - Engine unit identifier + - RUL: INTEGER - Remaining Useful Life value for that test unit + + QUERY PATTERNS: + + Table References: + - "train_FD001" or "dataset train_FD001" → Use table train_FD001 + - "test_FD002" or "dataset test_FD002" → Use table test_FD002 + - "FD003" (without train/test prefix) → Determine from context whether to use train_FD003 or test_FD003 + - For RUL queries: Use specific RUL table (RUL_FD001, RUL_FD002, RUL_FD003, or RUL_FD004) + + Counting Patterns: + - "How many units" → Use COUNT(DISTINCT unit_number) to count unique engines + - "How many records/data points/measurements/entries/rows" → Use COUNT(*) to count all records + + RUL Handling (CRITICAL DISTINCTION): + + 1. GROUND TRUTH RUL (for test data): + - Use when query asks for "actual RUL", "true RUL", "ground truth", or "what is the RUL" + - Query specific RUL table: SELECT RUL FROM RUL_FD001 WHERE unit_number=N + - For time-series with ground truth: ((SELECT MAX(time_in_cycles) FROM test_FDxxx WHERE unit_number=N) + (SELECT RUL FROM RUL_FDxxx WHERE unit_number=N) - time_in_cycles) + + 2. PREDICTED/CALCULATED RUL (for training data or prediction requests): + - Use when query asks to "predict RUL", "calculate RUL", "estimate RUL", or "find RUL" for training data + - For training data: Calculate as remaining cycles until failure = (MAX(time_in_cycles) - current_time_in_cycles + 1) + - Training RUL query: SELECT unit_number, time_in_cycles, (MAX(time_in_cycles) OVER (PARTITION BY unit_number) - time_in_cycles + 1) AS predicted_RUL FROM train_FDxxx + + DEFAULT BEHAVIOR: If unclear, assume user wants PREDICTION (since this is more common) + + Column Names (consistent across all training and test tables): + - unit_number: Engine identifier + - time_in_cycles: Time step + - operational_setting_1, operational_setting_2, operational_setting_3: Operational settings + - sensor_measurement_1, sensor_measurement_2, ..., sensor_measurement_21: Sensor readings + + IMPORTANT NOTES: + - Each dataset (FD001, FD002, FD003, FD004) has its own separate RUL table + - RUL tables do NOT have a 'dataset' column - they are dataset-specific by table name + - Training tables contain data until engine failure + - Test tables contain data that stops before failure + - RUL tables provide the actual remaining cycles for test units + + ENGINE OPERATION CONTEXT: + Each engine starts with different degrees of initial wear and manufacturing variation. + The engine operates normally at the start of each time series and develops a fault at some point during the series. + In the training set, the fault grows in magnitude until system failure. + In the test set, the time series ends some time prior to system failure. + The objective is to predict the number of remaining operational cycles before failure in the test set. + """ + vn.train(documentation=dataset_documentation) + + # Default training for NASA Turbofan + queries = [ + # 1. JOIN pattern between training and RUL tables + "SELECT t.unit_number, t.time_in_cycles, t.operational_setting_1, r.RUL FROM train_FD001 AS t JOIN RUL_FD001 AS r ON t.unit_number = r.unit_number WHERE t.unit_number = 1 ORDER BY t.time_in_cycles", + + # 2. Aggregation with multiple statistical functions + "SELECT unit_number, AVG(sensor_measurement_1) AS avg_sensor1, MAX(sensor_measurement_2) AS max_sensor2, MIN(sensor_measurement_3) AS min_sensor3 FROM train_FD002 GROUP BY unit_number", + + # 3. Test table filtering with time-based conditions + "SELECT * FROM test_FD003 WHERE time_in_cycles > 50 AND sensor_measurement_1 > 500 ORDER BY unit_number, time_in_cycles", + + # 4. Window function for predicted RUL calculation on training data + "SELECT unit_number, time_in_cycles, (MAX(time_in_cycles) OVER (PARTITION BY unit_number) - time_in_cycles + 1) AS predicted_RUL FROM train_FD004 WHERE unit_number <= 3 ORDER BY unit_number, time_in_cycles", + + # 5. Direct RUL table query with filtering + "SELECT unit_number, RUL FROM RUL_FD001 WHERE RUL > 100 ORDER BY RUL DESC" + ] + + for query in tqdm(queries, desc="Training NIMVanna"): + vn.train(sql=query) + + # Essential question-SQL training pairs (covering key RUL distinction) + vn.train(question="Get time cycles and operational setting 1 for unit 1 from test FD001", + sql="SELECT time_in_cycles, operational_setting_1 FROM test_FD001 WHERE unit_number = 1") + + # Ground Truth RUL (from RUL tables) + vn.train(question="What is the actual remaining useful life for unit 1 in test dataset FD001", + sql="SELECT RUL FROM RUL_FD001 WHERE unit_number = 1") + + # Predicted RUL (calculated for training data) + vn.train(question="Predict the remaining useful life for each time cycle of unit 1 in training dataset FD001", + sql="SELECT unit_number, time_in_cycles, (MAX(time_in_cycles) OVER (PARTITION BY unit_number) - time_in_cycles + 1) AS predicted_RUL FROM train_FD001 WHERE unit_number = 1 ORDER BY time_in_cycles") + + vn.train(question="How many units are in the training data for FD002", + sql="SELECT COUNT(DISTINCT unit_number) FROM train_FD002") + + # Additional RUL distinction training + vn.train(question="Calculate RUL for training data in FD003", + sql="SELECT unit_number, time_in_cycles, (MAX(time_in_cycles) OVER (PARTITION BY unit_number) - time_in_cycles + 1) AS predicted_RUL FROM train_FD003 ORDER BY unit_number, time_in_cycles") + + vn.train(question="Get ground truth RUL values for all units in test FD002", + sql="SELECT unit_number, RUL FROM RUL_FD002 ORDER BY unit_number") + +def chunk_documentation(text: str, max_chars: int = 1500) -> list: + """ + Split long documentation into smaller chunks to avoid token limits. + + Args: + text: The documentation text to chunk + max_chars: Maximum characters per chunk (approximate) + + Returns: + List of text chunks + """ + if len(text) <= max_chars: + return [text] + + chunks = [] + # Split by paragraphs first + paragraphs = text.split('\n\n') + current_chunk = "" + + for paragraph in paragraphs: + # If adding this paragraph would exceed the limit, save current chunk and start new one + if len(current_chunk) + len(paragraph) + 2 > max_chars and current_chunk: + chunks.append(current_chunk.strip()) + current_chunk = paragraph + else: + if current_chunk: + current_chunk += "\n\n" + paragraph + else: + current_chunk = paragraph + + # Add the last chunk if it exists + if current_chunk.strip(): + chunks.append(current_chunk.strip()) + + # If any chunk is still too long, split it further + final_chunks = [] + for chunk in chunks: + if len(chunk) > max_chars: + # Split long chunk into sentences + sentences = chunk.split('. ') + temp_chunk = "" + for sentence in sentences: + if len(temp_chunk) + len(sentence) + 2 > max_chars and temp_chunk: + final_chunks.append(temp_chunk.strip() + ".") + temp_chunk = sentence + else: + if temp_chunk: + temp_chunk += ". " + sentence + else: + temp_chunk = sentence + if temp_chunk.strip(): + final_chunks.append(temp_chunk.strip()) + else: + final_chunks.append(chunk) + + return final_chunks + +def initVanna(vn, training_data_path: str = None): + """ + Initialize and train a Vanna instance for SQL generation using configurable training data. + + This function configures a Vanna SQL generation agent with training data loaded from a YAML file, + making it scalable for different SQL data sources with different contexts. + + Args: + vn: Vanna instance to be trained and configured + training_data_path: Path to YAML file containing training data. If None, no training is applied. + + Returns: + None: Modifies the Vanna instance in-place + + Example: + >>> from vanna.chromadb import ChromaDB_VectorStore + >>> vn = NIMCustomLLM(config) & ChromaDB_VectorStore() + >>> vn.connect_to_sqlite("path/to/database.db") + >>> initVanna(vn, "path/to/training_data.yaml") + >>> # Vanna is now ready to generate SQL queries + """ + import json + import os + import logging + + logger = logging.getLogger(__name__) + logger.info("=== Starting Vanna initialization ===") + + # Get and train DDL from sqlite_master + logger.info("Loading DDL from sqlite_master...") + try: + df_ddl = vn.run_sql("SELECT type, sql FROM sqlite_master WHERE sql is not null") + ddl_count = len(df_ddl) + logger.info(f"Found {ddl_count} DDL statements in sqlite_master") + + for i, ddl in enumerate(df_ddl['sql'].to_list(), 1): + logger.debug(f"Training DDL {i}/{ddl_count}: {ddl[:100]}...") + vn.train(ddl=ddl) + + logger.info(f"Successfully trained {ddl_count} DDL statements from sqlite_master") + except Exception as e: + logger.error(f"Error loading DDL from sqlite_master: {e}") + raise + + # Load and apply training data from YAML file + if training_data_path: + logger.info(f"Training data path provided: {training_data_path}") + + if os.path.exists(training_data_path): + logger.info(f"Training data file exists, loading YAML...") + + try: + import yaml + with open(training_data_path, 'r') as f: + training_data = yaml.safe_load(f) + + logger.info(f"Successfully loaded YAML training data") + logger.info(f"Training data keys: {list(training_data.keys()) if training_data else 'None'}") + + # Train synthetic DDL statements + synthetic_ddl = training_data.get("synthetic_ddl", []) + logger.info(f"Found {len(synthetic_ddl)} synthetic DDL statements") + + ddl_trained = 0 + for i, ddl_statement in enumerate(synthetic_ddl, 1): + if ddl_statement.strip(): # Only train non-empty statements + logger.debug(f"Training synthetic DDL {i}: {ddl_statement[:100]}...") + vn.train(ddl=ddl_statement) + ddl_trained += 1 + else: + logger.warning(f"Skipping empty synthetic DDL statement at index {i}") + + logger.info(f"Successfully trained {ddl_trained}/{len(synthetic_ddl)} synthetic DDL statements") + + # Train documentation with chunking + documentation = training_data.get("documentation", "") + if documentation.strip(): + logger.info(f"Training documentation ({len(documentation)} characters)") + logger.debug(f"Documentation preview: {documentation[:200]}...") + + # Chunk documentation to avoid token limits + doc_chunks = chunk_documentation(documentation) + logger.info(f"Split documentation into {len(doc_chunks)} chunks") + + for i, chunk in enumerate(doc_chunks, 1): + try: + logger.debug(f"Training documentation chunk {i}/{len(doc_chunks)} ({len(chunk)} chars)") + vn.train(documentation=chunk) + except Exception as e: + logger.error(f"Error training documentation chunk {i}: {e}") + # Continue with other chunks + + logger.info(f"Successfully trained {len(doc_chunks)} documentation chunks") + else: + logger.warning("No documentation found or documentation is empty") + + # Train example queries + example_queries = training_data.get("example_queries", []) + logger.info(f"Found {len(example_queries)} example queries") + + queries_trained = 0 + for i, query_data in enumerate(example_queries, 1): + sql = query_data.get("sql", "") + if sql.strip(): # Only train non-empty queries + logger.debug(f"Training example query {i}: {sql[:100]}...") + vn.train(sql=sql) + queries_trained += 1 + else: + logger.warning(f"Skipping empty example query at index {i}") + + logger.info(f"Successfully trained {queries_trained}/{len(example_queries)} example queries") + + # Train question-SQL pairs + question_sql_pairs = training_data.get("question_sql_pairs", []) + logger.info(f"Found {len(question_sql_pairs)} question-SQL pairs") + + pairs_trained = 0 + for i, pair in enumerate(question_sql_pairs, 1): + question = pair.get("question", "") + sql = pair.get("sql", "") + if question.strip() and sql.strip(): # Only train non-empty pairs + logger.debug(f"Training question-SQL pair {i}: Q='{question[:50]}...' SQL='{sql[:50]}...'") + vn.train(question=question, sql=sql) + pairs_trained += 1 + else: + if not question.strip(): + logger.warning(f"Skipping question-SQL pair {i}: empty question") + if not sql.strip(): + logger.warning(f"Skipping question-SQL pair {i}: empty SQL") + + logger.info(f"Successfully trained {pairs_trained}/{len(question_sql_pairs)} question-SQL pairs") + + # Summary + total_trained = ddl_trained + len(doc_chunks) + queries_trained + pairs_trained + logger.info(f"=== Training Summary ===") + logger.info(f" Synthetic DDL: {ddl_trained}") + logger.info(f" Documentation chunks: {len(doc_chunks)}") + logger.info(f" Example queries: {queries_trained}") + logger.info(f" Question-SQL pairs: {pairs_trained}") + logger.info(f" Total items trained: {total_trained}") + + except yaml.YAMLError as e: + logger.error(f"Error parsing YAML file {training_data_path}: {e}") + raise + except Exception as e: + logger.error(f"Error loading training data from {training_data_path}: {e}") + raise + else: + logger.warning(f"Training data file does not exist: {training_data_path}") + logger.warning("Proceeding without YAML training data") + else: + logger.info("No training data path provided, skipping YAML training") + + logger.info("=== Vanna initialization completed ===") + diff --git a/industries/manufacturing/predictive_maintenance_agent/test_pdm_workflow.py b/industries/manufacturing/predictive_maintenance_agent/test_pdm_workflow.py new file mode 100644 index 00000000..8e7c933f --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/test_pdm_workflow.py @@ -0,0 +1,91 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +import importlib.resources +import inspect +import logging +import os +from pathlib import Path + +import pytest +from predictive_maintenance_agent import register + +from aiq.runtime.loader import load_workflow + +logger = logging.getLogger(__name__) + +def setup_environment(): + """Setup required environment variables for the workflow.""" + # Set PWD_PATH to current directory as mentioned in README + current_dir = Path(__file__).parent.absolute() + os.environ["PWD_PATH"] = str(current_dir) + + # Ensure NVIDIA_API_KEY is set (should be set externally + os.environ["NVIDIA_API_KEY"] = "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" + + logger.error(f"PWD_PATH set to: {os.environ['PWD_PATH']}") + + +async def run_workflow_with_prompt(prompt: str): + """ + Helper function to run the workflow with a given prompt. + + Args: + prompt: The prompt to send to the agent workflow + + Returns: + str: The result from the workflow execution + """ + # Setup environment variables + setup_environment() + + # Use our own package for config file location + package_name = inspect.getmodule(register).__package__ + config_file: Path = importlib.resources.files(package_name).parent.parent.joinpath("configs", "config-reasoning.yml").absolute() + + async with load_workflow(config_file) as workflow: + async with workflow.run(prompt) as runner: + result = await runner.result(to_type=str) + + return result + + +@pytest.mark.e2e +async def test_data_retrieval_and_plotting(): + """Test retrieving time in cycles and operational setting 1 for unit 1 and plotting.""" + + prompt = "Retrieve the time in cycles and operational setting 1 from the FD001 test table for unit number 1 and plot its value vs time." + + result = await run_workflow_with_prompt(prompt) + result_lower = result.lower() + + # Verify that the workflow completed successfully and generated output + assert "saved output to" in result_lower or "plot" in result_lower or "chart" in result_lower + logger.info(f"Test 1 completed successfully: {result}") + + +@pytest.mark.e2e +async def test_rul_distribution_analysis(): + """Test retrieving real RUL values and plotting their distribution.""" + + prompt = "Retrieve real RUL of each unit in the FD001 test dataset. Then plot a distribution of it." + + result = await run_workflow_with_prompt(prompt) + result_lower = result.lower() + + # Verify that the workflow completed successfully and generated output + assert "saved output to" in result_lower or "plot" in result_lower or "distribution" in result_lower + logger.info(f"Test 2 completed successfully: {result}") diff --git a/industries/manufacturing/predictive_maintenance_agent/vanna_training_data.yaml b/industries/manufacturing/predictive_maintenance_agent/vanna_training_data.yaml new file mode 100644 index 00000000..a942b02d --- /dev/null +++ b/industries/manufacturing/predictive_maintenance_agent/vanna_training_data.yaml @@ -0,0 +1,174 @@ +# Vanna SQL Agent Training Configuration +# ===================================== +# This YAML file contains all the training data needed to configure a Vanna SQL generation agent +# for your specific database and domain. Each section serves a different purpose in training +# the agent to understand your database structure and generate appropriate SQL queries. + +training_config: + # Basic metadata about this training configuration + description: "Training data for NASA Turbofan Engine predictive maintenance SQL generation" + version: "1.0" + # You should update these fields to describe your specific domain and use case + +# SYNTHETIC DDL STATEMENTS +# ======================== +# Purpose: Define table structures that may not be fully captured in the actual database schema +# When to use: +# - When you have tables that aren't in the main database but need to be referenced +# - When you want to ensure the agent knows about specific table structures +# - When you need to supplement incomplete schema information from sqlite_master +# How to populate: +# - Include CREATE TABLE statements for any tables the agent should know about +# - Focus on tables that are central to your domain but might be missing from auto-discovery +# - Use exact DDL syntax as if you were creating the tables manually +# - Include all columns with proper data types to help the agent understand structure +synthetic_ddl: + - "CREATE TABLE IF NOT EXISTS RUL_FD001 (\"unit_number\" INTEGER, \"RUL\" INTEGER)" + - "CREATE TABLE IF NOT EXISTS RUL_FD002 (\"unit_number\" INTEGER, \"RUL\" INTEGER)" + - "CREATE TABLE IF NOT EXISTS RUL_FD003 (\"unit_number\" INTEGER, \"RUL\" INTEGER)" + - "CREATE TABLE IF NOT EXISTS RUL_FD004 (\"unit_number\" INTEGER, \"RUL\" INTEGER)" + - "CREATE TABLE IF NOT EXISTS train_FD001 (\"unit_number\" INTEGER, \"time_in_cycles\" INTEGER, \"operational_setting_1\" REAL, \"operational_setting_2\" REAL, \"operational_setting_3\" REAL, \"sensor_measurement_1\" REAL, \"sensor_measurement_2\" REAL, \"sensor_measurement_3\" REAL, \"sensor_measurement_4\" REAL, \"sensor_measurement_5\" REAL, \"sensor_measurement_6\" REAL, \"sensor_measurement_7\" REAL, \"sensor_measurement_8\" REAL, \"sensor_measurement_9\" REAL, \"sensor_measurement_10\" REAL, \"sensor_measurement_11\" REAL, \"sensor_measurement_12\" REAL, \"sensor_measurement_13\" REAL, \"sensor_measurement_14\" REAL, \"sensor_measurement_15\" REAL, \"sensor_measurement_16\" REAL, \"sensor_measurement_17\" INTEGER, \"sensor_measurement_18\" INTEGER, \"sensor_measurement_19\" REAL, \"sensor_measurement_20\" REAL, \"sensor_measurement_21\" REAL)" + - "CREATE TABLE IF NOT EXISTS test_FD001 (\"unit_number\" INTEGER, \"time_in_cycles\" INTEGER, \"operational_setting_1\" REAL, \"operational_setting_2\" REAL, \"operational_setting_3\" REAL, \"sensor_measurement_1\" REAL, \"sensor_measurement_2\" REAL, \"sensor_measurement_3\" REAL, \"sensor_measurement_4\" REAL, \"sensor_measurement_5\" REAL, \"sensor_measurement_6\" REAL, \"sensor_measurement_7\" REAL, \"sensor_measurement_8\" REAL, \"sensor_measurement_9\" REAL, \"sensor_measurement_10\" REAL, \"sensor_measurement_11\" REAL, \"sensor_measurement_12\" REAL, \"sensor_measurement_13\" REAL, \"sensor_measurement_14\" REAL, \"sensor_measurement_15\" REAL, \"sensor_measurement_16\" REAL, \"sensor_measurement_17\" INTEGER, \"sensor_measurement_18\" INTEGER, \"sensor_measurement_19\" REAL, \"sensor_measurement_20\" REAL, \"sensor_measurement_21\" REAL)" + - "CREATE TABLE IF NOT EXISTS train_FD002 (\"unit_number\" INTEGER, \"time_in_cycles\" INTEGER, \"operational_setting_1\" REAL, \"operational_setting_2\" REAL, \"operational_setting_3\" REAL, \"sensor_measurement_1\" REAL, \"sensor_measurement_2\" REAL, \"sensor_measurement_3\" REAL, \"sensor_measurement_4\" REAL, \"sensor_measurement_5\" REAL, \"sensor_measurement_6\" REAL, \"sensor_measurement_7\" REAL, \"sensor_measurement_8\" REAL, \"sensor_measurement_9\" REAL, \"sensor_measurement_10\" REAL, \"sensor_measurement_11\" REAL, \"sensor_measurement_12\" REAL, \"sensor_measurement_13\" REAL, \"sensor_measurement_14\" REAL, \"sensor_measurement_15\" REAL, \"sensor_measurement_16\" REAL, \"sensor_measurement_17\" INTEGER, \"sensor_measurement_18\" INTEGER, \"sensor_measurement_19\" REAL, \"sensor_measurement_20\" REAL, \"sensor_measurement_21\" REAL)" + - "CREATE TABLE IF NOT EXISTS test_FD002 (\"unit_number\" INTEGER, \"time_in_cycles\" INTEGER, \"operational_setting_1\" REAL, \"operational_setting_2\" REAL, \"operational_setting_3\" REAL, \"sensor_measurement_1\" REAL, \"sensor_measurement_2\" REAL, \"sensor_measurement_3\" REAL, \"sensor_measurement_4\" REAL, \"sensor_measurement_5\" REAL, \"sensor_measurement_6\" REAL, \"sensor_measurement_7\" REAL, \"sensor_measurement_8\" REAL, \"sensor_measurement_9\" REAL, \"sensor_measurement_10\" REAL, \"sensor_measurement_11\" REAL, \"sensor_measurement_12\" REAL, \"sensor_measurement_13\" REAL, \"sensor_measurement_14\" REAL, \"sensor_measurement_15\" REAL, \"sensor_measurement_16\" REAL, \"sensor_measurement_17\" INTEGER, \"sensor_measurement_18\" INTEGER, \"sensor_measurement_19\" REAL, \"sensor_measurement_20\" REAL, \"sensor_measurement_21\" REAL)" + - "CREATE TABLE IF NOT EXISTS train_FD003 (\"unit_number\" INTEGER, \"time_in_cycles\" INTEGER, \"operational_setting_1\" REAL, \"operational_setting_2\" REAL, \"operational_setting_3\" REAL, \"sensor_measurement_1\" REAL, \"sensor_measurement_2\" REAL, \"sensor_measurement_3\" REAL, \"sensor_measurement_4\" REAL, \"sensor_measurement_5\" REAL, \"sensor_measurement_6\" REAL, \"sensor_measurement_7\" REAL, \"sensor_measurement_8\" REAL, \"sensor_measurement_9\" REAL, \"sensor_measurement_10\" REAL, \"sensor_measurement_11\" REAL, \"sensor_measurement_12\" REAL, \"sensor_measurement_13\" REAL, \"sensor_measurement_14\" REAL, \"sensor_measurement_15\" REAL, \"sensor_measurement_16\" REAL, \"sensor_measurement_17\" INTEGER, \"sensor_measurement_18\" INTEGER, \"sensor_measurement_19\" REAL, \"sensor_measurement_20\" REAL, \"sensor_measurement_21\" REAL)" + - "CREATE TABLE IF NOT EXISTS test_FD003 (\"unit_number\" INTEGER, \"time_in_cycles\" INTEGER, \"operational_setting_1\" REAL, \"operational_setting_2\" REAL, \"operational_setting_3\" REAL, \"sensor_measurement_1\" REAL, \"sensor_measurement_2\" REAL, \"sensor_measurement_3\" REAL, \"sensor_measurement_4\" REAL, \"sensor_measurement_5\" REAL, \"sensor_measurement_6\" REAL, \"sensor_measurement_7\" REAL, \"sensor_measurement_8\" REAL, \"sensor_measurement_9\" REAL, \"sensor_measurement_10\" REAL, \"sensor_measurement_11\" REAL, \"sensor_measurement_12\" REAL, \"sensor_measurement_13\" REAL, \"sensor_measurement_14\" REAL, \"sensor_measurement_15\" REAL, \"sensor_measurement_16\" REAL, \"sensor_measurement_17\" INTEGER, \"sensor_measurement_18\" INTEGER, \"sensor_measurement_19\" REAL, \"sensor_measurement_20\" REAL, \"sensor_measurement_21\" REAL)" + - "CREATE TABLE IF NOT EXISTS train_FD004 (\"unit_number\" INTEGER, \"time_in_cycles\" INTEGER, \"operational_setting_1\" REAL, \"operational_setting_2\" REAL, \"operational_setting_3\" REAL, \"sensor_measurement_1\" REAL, \"sensor_measurement_2\" REAL, \"sensor_measurement_3\" REAL, \"sensor_measurement_4\" REAL, \"sensor_measurement_5\" REAL, \"sensor_measurement_6\" REAL, \"sensor_measurement_7\" REAL, \"sensor_measurement_8\" REAL, \"sensor_measurement_9\" REAL, \"sensor_measurement_10\" REAL, \"sensor_measurement_11\" REAL, \"sensor_measurement_12\" REAL, \"sensor_measurement_13\" REAL, \"sensor_measurement_14\" REAL, \"sensor_measurement_15\" REAL, \"sensor_measurement_16\" REAL, \"sensor_measurement_17\" INTEGER, \"sensor_measurement_18\" INTEGER, \"sensor_measurement_19\" REAL, \"sensor_measurement_20\" REAL, \"sensor_measurement_21\" REAL)" + - "CREATE TABLE IF NOT EXISTS test_FD004 (\"unit_number\" INTEGER, \"time_in_cycles\" INTEGER, \"operational_setting_1\" REAL, \"operational_setting_2\" REAL, \"operational_setting_3\" REAL, \"sensor_measurement_1\" REAL, \"sensor_measurement_2\" REAL, \"sensor_measurement_3\" REAL, \"sensor_measurement_4\" REAL, \"sensor_measurement_5\" REAL, \"sensor_measurement_6\" REAL, \"sensor_measurement_7\" REAL, \"sensor_measurement_8\" REAL, \"sensor_measurement_9\" REAL, \"sensor_measurement_10\" REAL, \"sensor_measurement_11\" REAL, \"sensor_measurement_12\" REAL, \"sensor_measurement_13\" REAL, \"sensor_measurement_14\" REAL, \"sensor_measurement_15\" REAL, \"sensor_measurement_16\" REAL, \"sensor_measurement_17\" INTEGER, \"sensor_measurement_18\" INTEGER, \"sensor_measurement_19\" REAL, \"sensor_measurement_20\" REAL, \"sensor_measurement_21\" REAL)" + +# DOMAIN DOCUMENTATION +# ==================== +# Purpose: Provide context about your database structure, business rules, and query patterns +# When to use: Always - this is crucial for helping the agent understand your domain +# How to populate: +# - Use second-person language ("You are working with...", "When you see...") +# - Explain the business context and what the data represents +# - Define important query patterns and conventions specific to your domain +# - Include any business rules or logical distinctions the agent should understand +# - Explain column meanings, especially if they're not self-evident +# - Provide guidance on default behaviors when queries are ambiguous +# - Think of this as training documentation for a new team member who needs to understand your database +documentation: | + You are working with a SQL database containing train and test splits of four different datasets: FD001, FD002, FD003, FD004. + Each dataset consists of multiple multivariate time series from different engines of the same type. + + DATABASE STRUCTURE YOU'LL WORK WITH: + The data is organized into separate tables for each dataset that you'll need to query: + + Training Tables: train_FD001, train_FD002, train_FD003, train_FD004 + Test Tables: test_FD001, test_FD002, test_FD003, test_FD004 + RUL Tables: RUL_FD001, RUL_FD002, RUL_FD003, RUL_FD004 + + When you query training and test tables, you'll find 26 columns with identical structure: + - unit_number: INTEGER - Identifier for each engine unit + - time_in_cycles: INTEGER - Time step in operational cycles + - operational_setting_1: REAL - First operational setting affecting performance + - operational_setting_2: REAL - Second operational setting affecting performance + - operational_setting_3: REAL - Third operational setting affecting performance + - sensor_measurement_1 through sensor_measurement_21: REAL/INTEGER - Twenty-one sensor measurements + + When you query RUL tables, you'll find 2 columns: + - unit_number: INTEGER - Engine unit identifier + - RUL: INTEGER - Remaining Useful Life value for that test unit + + QUERY PATTERNS YOU SHOULD USE: + + Table References: + - When you see "train_FD001" or "dataset train_FD001" → Use table train_FD001 + - When you see "test_FD002" or "dataset test_FD002" → Use table test_FD002 + - When you see "FD003" (without train/test prefix) → Determine from context whether to use train_FD003 or test_FD003 + - For RUL queries: Use the specific RUL table (RUL_FD001, RUL_FD002, RUL_FD003, or RUL_FD004) + + Counting Patterns You Should Follow: + - When asked "How many units" → Use COUNT(DISTINCT unit_number) to count unique engines + - When asked "How many records/data points/measurements/entries/rows" → Use COUNT(*) to count all records + + RUL Handling (CRITICAL - YOU MUST DISTINGUISH): + + 1. GROUND TRUTH RUL (for test data): + - Use when you see requests for "actual RUL", "true RUL", "ground truth", or "what is the RUL" + - You should query the specific RUL table: SELECT RUL FROM RUL_FD001 WHERE unit_number=N + - For time-series with ground truth: ((SELECT MAX(time_in_cycles) FROM test_FDxxx WHERE unit_number=N) + (SELECT RUL FROM RUL_FDxxx WHERE unit_number=N) - time_in_cycles) + + 2. PREDICTED/CALCULATED RUL (for training data or prediction requests): + - Use when you see requests to "predict RUL", "calculate RUL", "estimate RUL", or "find RUL" for training data + - For training data: You should calculate as remaining cycles until failure = (MAX(time_in_cycles) - current_time_in_cycles + 1) + - Your training RUL query should be: SELECT unit_number, time_in_cycles, (MAX(time_in_cycles) OVER (PARTITION BY unit_number) - time_in_cycles + 1) AS predicted_RUL FROM train_FDxxx + + DEFAULT BEHAVIOR YOU SHOULD FOLLOW: If unclear, assume the user wants PREDICTION (since this is more common) + + Column Names You'll Use (consistent across all training and test tables): + - unit_number: Engine identifier + - time_in_cycles: Time step + - operational_setting_1, operational_setting_2, operational_setting_3: Operational settings + - sensor_measurement_1, sensor_measurement_2, ..., sensor_measurement_21: Sensor readings + + IMPORTANT NOTES FOR YOUR QUERIES: + - Each dataset (FD001, FD002, FD003, FD004) has its own separate RUL table + - RUL tables do NOT have a 'dataset' column - they are dataset-specific by table name + - Training tables contain data until engine failure + - Test tables contain data that stops before failure + - RUL tables provide the actual remaining cycles for test units + + ENGINE OPERATION CONTEXT FOR YOUR UNDERSTANDING: + You are working with engine data where each engine starts with different degrees of initial wear and manufacturing variation. + The engine operates normally at the start of each time series and develops a fault at some point during the series. + In the training set, the fault grows in magnitude until system failure. + In the test set, the time series ends some time prior to system failure. + Your objective is to help predict the number of remaining operational cycles before failure in the test set. + +# EXAMPLE QUERIES +# =============== +# Purpose: Teach the agent common SQL patterns and query structures for your domain +# When to use: Include 3-7 diverse examples that cover the main query patterns you expect +# How to populate: +# - Choose queries that represent different SQL concepts (JOINs, aggregations, window functions, etc.) +# - Focus on domain-specific patterns that are unique to your use case +# - Include complex queries that demonstrate proper table relationships +# - Add a description to explain what pattern each query demonstrates +# - Prioritize quality over quantity - better to have 5 great examples than 20 mediocre ones +example_queries: + - description: "JOIN pattern between training and RUL tables" + sql: "SELECT t.unit_number, t.time_in_cycles, t.operational_setting_1, r.RUL FROM train_FD001 AS t JOIN RUL_FD001 AS r ON t.unit_number = r.unit_number WHERE t.unit_number = 1 ORDER BY t.time_in_cycles" + + - description: "Aggregation with multiple statistical functions" + sql: "SELECT unit_number, AVG(sensor_measurement_1) AS avg_sensor1, MAX(sensor_measurement_2) AS max_sensor2, MIN(sensor_measurement_3) AS min_sensor3 FROM train_FD002 GROUP BY unit_number" + + - description: "Test table filtering with time-based conditions" + sql: "SELECT * FROM test_FD003 WHERE time_in_cycles > 50 AND sensor_measurement_1 > 500 ORDER BY unit_number, time_in_cycles" + + - description: "Window function for predicted RUL calculation on training data" + sql: "SELECT unit_number, time_in_cycles, (MAX(time_in_cycles) OVER (PARTITION BY unit_number) - time_in_cycles + 1) AS predicted_RUL FROM train_FD004 WHERE unit_number <= 3 ORDER BY unit_number, time_in_cycles" + + - description: "Direct RUL table query with filtering" + sql: "SELECT unit_number, RUL FROM RUL_FD001 WHERE RUL > 100 ORDER BY RUL DESC" + +# QUESTION-SQL PAIRS +# ================== +# Purpose: Train the agent to map natural language questions to specific SQL queries +# When to use: Include 5-10 pairs that cover the most common user questions in your domain +# How to populate: +# - Use realistic questions that your users would actually ask +# - Cover edge cases and domain-specific terminology +# - Include both simple and complex question patterns +# - Focus on questions that demonstrate important business logic distinctions +# - Include variations of similar questions to improve robustness +# - Make sure questions cover different table types and query patterns +question_sql_pairs: + - question: "Get time cycles and operational setting 1 for unit 1 from test FD001" + sql: "SELECT time_in_cycles, operational_setting_1 FROM test_FD001 WHERE unit_number = 1" + + - question: "What is the actual remaining useful life for unit 1 in test dataset FD001" + sql: "SELECT RUL FROM RUL_FD001 WHERE unit_number = 1" + + - question: "Predict the remaining useful life for each time cycle of unit 1 in training dataset FD001" + sql: "SELECT unit_number, time_in_cycles, (MAX(time_in_cycles) OVER (PARTITION BY unit_number) - time_in_cycles + 1) AS predicted_RUL FROM train_FD001 WHERE unit_number = 1 ORDER BY time_in_cycles" + + - question: "How many units are in the training data for FD002" + sql: "SELECT COUNT(DISTINCT unit_number) FROM train_FD002" + + - question: "Calculate RUL for training data in FD003" + sql: "SELECT unit_number, time_in_cycles, (MAX(time_in_cycles) OVER (PARTITION BY unit_number) - time_in_cycles + 1) AS predicted_RUL FROM train_FD003 ORDER BY unit_number, time_in_cycles" + + - question: "Get ground truth RUL values for all units in test FD002" + sql: "SELECT unit_number, RUL FROM RUL_FD002 ORDER BY unit_number" \ No newline at end of file