VectorInstitute · kohankhaki · Nov 7, 2025 · Aug 26, 2025 · Sep 5, 2025 · Sep 5, 2025
diff --git a/README.md b/README.md
@@ -73,18 +73,109 @@ Utilize the capability and the corresponding subject LLM score to select or gene
 ```bash
 python -m src.run_lbo
 ```
-
 ### Agentic Generation Scripts
 
-Generate areas, capabilities, and tasks using multi-agent debate systems. Configure parameters in `src/cfg/agentic_config.yaml`.
+These scripts implement the multi-agent debate workflow for automated generation of areas, capabilities, tasks, and solutions.
+All configurable parameters are defined in `src/cfg/agentic_config.yaml`.
+
+#### Understanding Pipeline Tags
+
+The pipeline uses **auto-generated tags** to organize outputs from each step. Understanding how tags work is essential for running the pipeline:
+
+- **Tag Format**: Tags are automatically generated timestamps in the format `_YYYYMMDD_HHMMSS` (e.g., `_20251104_143022`)
+- **Auto-Generation**: When you run a step (e.g., Generate Areas), the script automatically creates a tag and includes it in the output path
+- **Finding Tags**: After running a step, check the console output or the output directory to see the generated tag. The tag appears in the file path where outputs are saved
+- **Using Tags**: To run the next step in the pipeline, you need to specify the tag from the previous step's output:
+  - Step 2 (Generate Capabilities) needs `areas_tag` from Step 1
+  - Step 3 (Generate Tasks) needs `capabilities_tag` from Step 2
+  - Step 4 (Generate Solutions) needs `tasks_tag` from Step 3
 
+**Example Workflow**:
+1. Run `python -m src.agentic_area_generator` → outputs to `.../areas/_20251104_143022/areas.json`
+2. Use the tag `_20251104_143022` in the next step:
+   ```bash
+   python -m src.agentic_capability_generator pipeline_tags.areas_tag=_20251104_143022
+   ```
+3. The capability generator outputs to `.../capabilities/_20251104_150315/...`
+4. Use this new tag for the next step, and so on.
+
+---
+
+#### 1. Generate Areas
+Generate domain areas using the scientist–moderator debate system:
 ```bash
-# Generate capability areas
 python -m src.agentic_area_generator
+```
+
+This step auto-generates a tag (e.g., `_20251104_143022`) and outputs the results to:
+
+**Output location:**
+```
+~/<output_dir>/<domain>/<exp_id>/areas/<areas_tag>/areas.json
+```
+Where:
+- `<output_dir>` comes from `global_cfg.output_dir`
+- `<domain>` comes from `global_cfg.domain` (spaces replaced with underscores)
+- `<exp_id>` comes from `exp_cfg.exp_id`
+- `<areas_tag>` is the auto-generated tag for this run (use this tag in Step 2)
+
+#### 2. Generate Capabilities
+Generate capabilities for each area:
+```bash
+# Use the areas_tag from Step 1 (Generate Areas) output
+python -m src.agentic_capability_generator pipeline_tags.areas_tag=_YYYYMMDD_HHMMSS pipeline_tags.resume_capabilities_tag=_YYYYMMDD_HHMMSS
+```
+
+**Options:**
+- `pipeline_tags.areas_tag` specifies which set of areas to use when generating capabilities. This should be the `<areas_tag>` from the output of Step 1 (Generate Areas).
+- `pipeline_tags.resume_capabilities_tag` (optional) resumes a previous capability generation run.
+
+This step auto-generates a new tag for the capabilities output.
+
+**Output location:**
+```
+~/<output_dir>/<domain>/<exp_id>/capabilities/<capabilities_tag>/<area>/capabilities.json
+```
+Where:
+- `<capabilities_tag>` is the auto-generated tag for this run (use this tag in Step 3)
+
+
+#### 3. Generate Tasks
+Generate evaluation tasks for a specific capabilities tag:
+```bash
+# Use the capabilities_tag from Step 2 (Generate Capabilities) output
+python -m src.agentic_task_generator pipeline_tags.capabilities_tag=_YYYYMMDD_HHMMSS pipeline_tags.resume_tasks_tag=_YYYYMMDD_HHMMSS
+```
+
+**Options:**
+- `pipeline_tags.capabilities_tag` specifies which set of capabilities to use when generating tasks. This should be the `<capabilities_tag>` from the output of Step 2 (Generate Capabilities).
+- `pipeline_tags.resume_tasks_tag` (optional) resumes a previous task generation run.
+
+This step auto-generates a new tag for the tasks output.
 
-# Generate capabilities for each area
-python -m src.agentic_capability_generator
+**Output location:**
+```
+~/<output_dir>/<domain>/<exp_id>/tasks/<tasks_tag>/[<area>]-[<capability>]/tasks.json
+```
+Where:
+- `<tasks_tag>` is the auto-generated tag for this run (use this tag in Step 4)
+
+#### 4. Generate Solutions
+Solve generated tasks using the multi-agent debate system:
+```bash
+# Use the tasks_tag from Step 3 (Generate Tasks) output
+python -m src.agentic_task_solver pipeline_tags.tasks_tag=_YYYYMMDD_HHMMSS pipeline_tags.resume_solutions_tag=_YYYYMMDD_HHMMSS
+```
+
+**Options:**
+- `pipeline_tags.tasks_tag` specifies which set of tasks to solve. This should be the `<tasks_tag>` from the output of Step 3 (Generate Tasks).
+- `pipeline_tags.resume_solutions_tag` (optional) resumes a previous solution generation run.
 
-# Generate tasks for each capability
-python -m src.agentic_task_generator
+This step auto-generates a new tag for the solutions output.
+
+**Output location:**
+```
+~/<output_dir>/<domain>/<exp_id>/task_solutions/<solutions_tag>/[<area>]-[<capability>]/<task_id>_solution.json
 ```
+Where:
+- `<solutions_tag>` is the auto-generated tag for this run
diff --git a/src/agentic_capability_generator.py b/src/agentic_capability_generator.py
@@ -63,7 +63,7 @@ def main(cfg: DictConfig) -> None:
                 error_msg = "No areas_tag provided. Please provide pipeline_tags.areas_tag=<tag> to specify which areas to use."
                 log.warning(error_msg)
                 span.update(
-                    level="WARNING",
+                    level="ERROR",
                     status_message="Missing areas_tag",
                     metadata={"areas_tag_missing": error_msg},
                 )

diff --git a/src/agentic_task_generator.py b/src/agentic_task_generator.py
@@ -2,39 +2,118 @@
 
 import asyncio
 import logging
+import os
 import traceback
 
 import hydra
+import openlit
+from langfuse import Langfuse
 from omegaconf import DictConfig, OmegaConf
 
-from .task_generation import generate_tasks
+from src.task_generation import generate_tasks
 
 
+# Suppress OpenTelemetry console output
+os.environ["OTEL_LOG_LEVEL"] = "ERROR"
+os.environ["OTEL_METRICS_EXPORTER"] = "none"
+os.environ["OTEL_PYTHON_LOG_CORRELATION"] = "false"
+os.environ["OTEL_PYTHON_LOG_LEVEL"] = "ERROR"
+
 log = logging.getLogger("agentic_task_gen")
 
+lf = Langfuse()
+openlit.init(tracer=lf._otel_tracer, disable_batch=True, disable_metrics=True)
+
 
 @hydra.main(version_base=None, config_path="cfg", config_name="agentic_config")
 def main(cfg: DictConfig) -> None:
     """Run the multi-agent task generation system."""
-    log.info("Starting multi-agent task generation")
-    log.info("Configuration:\n%s", OmegaConf.to_yaml(cfg, resolve=True))
-
-    # Check for capabilities_tag parameter
     capabilities_tag = cfg.pipeline_tags.capabilities_tag
-    if capabilities_tag:
-        log.info(f"Using capabilities from tag: {capabilities_tag}")
-    else:
-        log.warning(
-            "No capabilities_tag provided. Please provide --pipeline_tags.capabilities_tag=<tag> to specify which capabilities to use."
-        )
-        return
-
-    try:
-        asyncio.run(generate_tasks(cfg, capabilities_tag))
-    except Exception as e:
-        log.error(f"Task generation failed: {e}")
-        log.error(f"Full traceback: {traceback.format_exc()}")
-        raise
+    resume_tag = getattr(cfg.pipeline_tags, "resume_tasks_tag", None)
+    domain_name = cfg.global_cfg.domain
+    exp_id = cfg.exp_cfg.exp_id
+
+    with lf.start_as_current_span(
+        name=f"ace_agentic_task_generation:{domain_name}:{exp_id}"
+    ) as span:
+        try:
+            msg = "Starting multi-agent task generation"
+            log.info(msg)
+            span.update(metadata={"system_started": msg})
+
+            config_yaml = OmegaConf.to_yaml(cfg, resolve=True)
+            msg = "Configuration loaded"
+            log.info("Configuration:\n%s", config_yaml)
+            span.update(
+                metadata={
+                    "configuration_loaded": msg,
+                    "config": config_yaml,
+                    "domain": domain_name,
+                    "exp_id": exp_id,
+                }
+            )
+
+            if capabilities_tag:
+                msg = f"Using capabilities from tag: {capabilities_tag}"
+                log.info(msg)
+                span.update(
+                    metadata={
+                        "capabilities_tag_found": msg,
+                        "capabilities_tag": capabilities_tag,
+                    }
+                )
+            else:
+                error_msg = "No capabilities_tag provided. Please provide pipeline_tags.capabilities_tag=<tag> to specify which capabilities to use."
+                log.warning(error_msg)
+                span.update(
+                    level="ERROR",
+                    status_message="Missing capabilities_tag",
+                    metadata={"capabilities_tag_missing": error_msg},
+                )
+                return
+
+            if resume_tag:
+                msg = f"Resuming task generation from tag: {resume_tag}"
+                log.info(msg)
+                span.update(
+                    metadata={"resume_tag_found": msg, "resume_tag": resume_tag}
+                )
+
+            span.update_trace(
+                metadata={
+                    "domain": domain_name,
+                    "exp_id": exp_id,
+                    "capabilities_tag": capabilities_tag,
+                    "resume_tag": resume_tag,
+                    "config": config_yaml,
+                },
+                tags=["agentic_task_generation", exp_id],
+            )
+
+            asyncio.run(generate_tasks(cfg, capabilities_tag, lf, resume_tag))
+
+            msg = "Multi-agent task generation completed successfully"
+            log.info(msg)
+            span.update(metadata={"system_completed": msg})
+
+        except Exception as e:
+            error_msg = f"Task generation failed: {e}"
+            traceback_msg = f"Full traceback: {traceback.format_exc()}"
+
+            log.error(error_msg)
+            log.error(traceback_msg)
+
+            span.update(
+                level="ERROR",
+                status_message=str(e),
+                metadata={
+                    "system_error": error_msg,
+                    "error": str(e),
+                    "traceback": traceback_msg,
+                },
+            )
+
+            raise
 
 
 if __name__ == "__main__":

diff --git a/src/agentic_task_solver.py b/src/agentic_task_solver.py
@@ -0,0 +1,125 @@
+"""Multi-agent debate system for solving generated tasks."""
+
+import asyncio
+import logging
+import os
+import traceback
+
+import hydra
+import openlit
+from langfuse import Langfuse
+from omegaconf import DictConfig, OmegaConf
+
+from src.task_solver import solve_tasks
+
+
+# Suppress OpenTelemetry console output
+os.environ["OTEL_LOG_LEVEL"] = "ERROR"
+os.environ["OTEL_METRICS_EXPORTER"] = "none"
+os.environ["OTEL_PYTHON_LOG_CORRELATION"] = "false"
+os.environ["OTEL_PYTHON_LOG_LEVEL"] = "ERROR"
+
+log = logging.getLogger("agentic_task_solver")
+
+langfuse_client = Langfuse()
+openlit.init(
+    tracer=langfuse_client._otel_tracer, disable_batch=True, disable_metrics=True
+)
+
+
+@hydra.main(version_base=None, config_path="cfg", config_name="agentic_config")
+def main(cfg: DictConfig) -> None:
+    """Run the multi-agent debate-based task solving system."""
+    tasks_tag = cfg.pipeline_tags.get("tasks_tag")
+    resume_tag = getattr(cfg.pipeline_tags, "resume_solutions_tag", None)
+    domain_name = cfg.global_cfg.domain
+    exp_id = cfg.exp_cfg.exp_id
+
+    with langfuse_client.start_as_current_span(
+        name=f"ace_agentic_task_solver:{domain_name}:{exp_id}"
+    ) as span:
+        try:
+            msg = "Starting multi-agent debate-based task solver"
+            log.info(msg)
+            span.update(metadata={"system_started": msg})
+
+            config_yaml = OmegaConf.to_yaml(cfg, resolve=True)
+            msg = "Configuration loaded"
+            log.info("Configuration:\n%s", config_yaml)
+            span.update(
+                metadata={
+                    "configuration_loaded": msg,
+                    "config": config_yaml,
+                    "domain": domain_name,
+                    "exp_id": exp_id,
+                }
+            )
+
+            if tasks_tag:
+                msg = f"Using tasks from tag: {tasks_tag}"
+                log.info(msg)
+                span.update(
+                    metadata={
+                        "tasks_tag_found": msg,
+                        "tasks_tag": tasks_tag,
+                    }
+                )
+            else:
+                error_msg = "No tasks_tag provided. Please provide pipeline_tags.tasks_tag=<tag> to specify which tasks to solve."
+                log.warning(error_msg)
+                span.update(
+                    level="ERROR",
+                    status_message="Missing tasks_tag",
+                    metadata={"tasks_tag_missing": error_msg},
+                )
+                return
+
+            if resume_tag:
+                msg = f"Resuming task solving from tag: {resume_tag}"
+                log.info(msg)
+                span.update(
+                    metadata={"resume_tag_found": msg, "resume_tag": resume_tag}
+                )
+
+            span.update_trace(
+                metadata={
+                    "domain": domain_name,
+                    "exp_id": exp_id,
+                    "tasks_tag": tasks_tag,
+                    "resume_tag": resume_tag,
+                    "config": config_yaml,
+                },
+                tags=["agentic_task_solver", exp_id],
+            )
+
+            asyncio.run(solve_tasks(cfg, tasks_tag, langfuse_client, resume_tag))
+
+            msg = "Multi-agent debate-based task solving completed successfully"
+            log.info(msg)
+            span.update(metadata={"system_completed": msg})
+
+        except Exception as e:
+            error_msg = f"Task solving failed: {e}"
+            traceback_msg = f"Full traceback: {traceback.format_exc()}"
+
+            log.error(error_msg)
+            log.error(traceback_msg)
+
+            span.update(
+                level="ERROR",
+                status_message=str(e),
+                metadata={
+                    "system_error": error_msg,
+                    "error": str(e),
+                    "traceback": traceback_msg,
+                },
+            )
+
+            raise
+
+        finally:
+            langfuse_client.flush()
+
+
+if __name__ == "__main__":
+    main()