diff --git a/pyproject.toml b/pyproject.toml
index bf148234f..21f8f967a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -116,6 +116,10 @@ no_implicit_reexport = true
 
 disallow_untyped_defs = false
 
+[[tool.mypy.overrides]]
+module = "uipath._cli._interactive.*"
+disable_error_code = ["misc", "unused-ignore"]
+
 [tool.pydantic-mypy]
 init_forbid_extra = true
 init_typed = true
diff --git a/samples/calculator/evaluationSets/comprehensive_calculator_tests.json b/samples/calculator/evaluationSets/comprehensive_calculator_tests.json
new file mode 100644
index 000000000..f8c941cb2
--- /dev/null
+++ b/samples/calculator/evaluationSets/comprehensive_calculator_tests.json
@@ -0,0 +1,118 @@
+{
+  "id": "calc-comprehensive-001",
+  "fileName": "comprehensive_eval_set.json",
+  "evaluatorRefs": ["exact-match-eval", "json-similarity-eval"],
+  "name": "Comprehensive Calculator Tests",
+  "batchSize": 10,
+  "timeoutMinutes": 10,
+  "modelSettings": [],
+  "createdAt": "2025-01-25T00:00:00Z",
+  "updatedAt": "2025-01-25T00:00:00Z",
+  "evaluations": [
+    {
+      "id": "add-basic",
+      "name": "Basic Addition",
+      "inputs": {
+        "a": 5,
+        "b": 3,
+        "operator": "+"
+      },
+      "expectedOutput": {
+        "result": 8.0
+      },
+      "expectedAgentBehavior": "Add two positive numbers",
+      "simulationInstructions": "",
+      "simulateInput": false,
+      "inputGenerationInstructions": "",
+      "simulateTools": false,
+      "toolsToSimulate": [],
+      "evalSetId": "calc-comprehensive-001",
+      "createdAt": "2025-01-25T00:00:00Z",
+      "updatedAt": "2025-01-25T00:00:00Z"
+    },
+    {
+      "id": "sub-basic",
+      "name": "Basic Subtraction",
+      "inputs": {
+        "a": 10,
+        "b": 4,
+        "operator": "-"
+      },
+      "expectedOutput": {
+        "result": 6.0
+      },
+      "expectedAgentBehavior": "Subtract smaller from larger",
+      "simulationInstructions": "",
+      "simulateInput": false,
+      "inputGenerationInstructions": "",
+      "simulateTools": false,
+      "toolsToSimulate": [],
+      "evalSetId": "calc-comprehensive-001",
+      "createdAt": "2025-01-25T00:00:00Z",
+      "updatedAt": "2025-01-25T00:00:00Z"
+    },
+    {
+      "id": "mul-basic",
+      "name": "Basic Multiplication",
+      "inputs": {
+        "a": 7,
+        "b": 6,
+        "operator": "*"
+      },
+      "expectedOutput": {
+        "result": 42.0
+      },
+      "expectedAgentBehavior": "Multiply two integers",
+      "simulationInstructions": "",
+      "simulateInput": false,
+      "inputGenerationInstructions": "",
+      "simulateTools": false,
+      "toolsToSimulate": [],
+      "evalSetId": "calc-comprehensive-001",
+      "createdAt": "2025-01-25T00:00:00Z",
+      "updatedAt": "2025-01-25T00:00:00Z"
+    },
+    {
+      "id": "div-basic",
+      "name": "Basic Division",
+      "inputs": {
+        "a": 15,
+        "b": 3,
+        "operator": "/"
+      },
+      "expectedOutput": {
+        "result": 5.0
+      },
+      "expectedAgentBehavior": "Divide evenly",
+      "simulationInstructions": "",
+      "simulateInput": false,
+      "inputGenerationInstructions": "",
+      "simulateTools": false,
+      "toolsToSimulate": [],
+      "evalSetId": "calc-comprehensive-001",
+      "createdAt": "2025-01-25T00:00:00Z",
+      "updatedAt": "2025-01-25T00:00:00Z"
+    },
+    {
+      "id": "div-zero",
+      "name": "Division by Zero",
+      "inputs": {
+        "a": 10,
+        "b": 0,
+        "operator": "/"
+      },
+      "expectedOutput": {
+        "result": 0.0
+      },
+      "expectedAgentBehavior": "Handle division by zero",
+      "simulationInstructions": "",
+      "simulateInput": false,
+      "inputGenerationInstructions": "",
+      "simulateTools": false,
+      "toolsToSimulate": [],
+      "evalSetId": "calc-comprehensive-001",
+      "createdAt": "2025-01-25T00:00:00Z",
+      "updatedAt": "2025-01-25T00:00:00Z"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/samples/calculator/evaluators/exact_match.json b/samples/calculator/evaluators/exact_match.json
new file mode 100644
index 000000000..4750fc819
--- /dev/null
+++ b/samples/calculator/evaluators/exact_match.json
@@ -0,0 +1,10 @@
+{
+  "id": "exact-match-eval",
+  "name": "Exact Match Evaluator",
+  "description": "Tests for exact output matches",
+  "category": 0,
+  "type": 1,
+  "targetOutputKey": "*",
+  "createdAt": "2025-01-25T00:00:00Z",
+  "updatedAt": "2025-01-25T00:00:00Z"
+}
diff --git a/samples/calculator/evaluators/json_similarity.json b/samples/calculator/evaluators/json_similarity.json
new file mode 100644
index 000000000..b1fac450e
--- /dev/null
+++ b/samples/calculator/evaluators/json_similarity.json
@@ -0,0 +1,10 @@
+{
+  "id": "json-similarity-eval",
+  "name": "JSON Similarity Evaluator",
+  "description": "Tests for structural JSON similarity with tolerance",
+  "category": 0,
+  "type": 6,
+  "targetOutputKey": "*",
+  "createdAt": "2025-01-25T00:00:00Z",
+  "updatedAt": "2025-01-25T00:00:00Z"
+}
diff --git a/src/uipath/_cli/_interactive/__init__.py b/src/uipath/_cli/_interactive/__init__.py
new file mode 100644
index 000000000..3fe5a81ab
--- /dev/null
+++ b/src/uipath/_cli/_interactive/__init__.py
@@ -0,0 +1,5 @@
+"""Interactive evaluation CLI module."""
+
+from ._main import launch_interactive_cli
+
+__all__ = ["launch_interactive_cli"]
diff --git a/src/uipath/_cli/_interactive/_discovery.py b/src/uipath/_cli/_interactive/_discovery.py
new file mode 100644
index 000000000..08ea55d84
--- /dev/null
+++ b/src/uipath/_cli/_interactive/_discovery.py
@@ -0,0 +1,48 @@
+"""Discovery utilities for finding eval sets and evaluators."""
+# type: ignore
+
+import json
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ._main import InteractiveEvalCLI
+
+
+class DiscoveryMixin:
+    """Mixin for file discovery operations."""
+
+    def _discover_files(self: "InteractiveEvalCLI") -> None:
+        """Quickly discover eval sets and evaluators."""
+        # Clear existing lists to avoid duplicates
+        self.eval_sets.clear()
+        self.evaluators.clear()
+
+        # Find eval sets from evaluationSets folder
+        eval_sets_dir = self.project_root / "evaluationSets"
+        if eval_sets_dir.exists():
+            for eval_file in eval_sets_dir.glob("*.json"):
+                try:
+                    with open(eval_file) as f:
+                        data = json.load(f)
+                    # Check if it's an eval set by presence of "evaluations" array
+                    if "evaluations" in data and isinstance(
+                        data.get("evaluations"), list
+                    ):
+                        name = data.get("name", eval_file.stem)
+                        self.eval_sets.append((name, eval_file))
+                except Exception:
+                    pass
+
+        # Find evaluators from evaluators folder
+        evaluators_dir = self.project_root / "evaluators"
+        if evaluators_dir.exists():
+            for eval_file in evaluators_dir.glob("*.json"):
+                try:
+                    with open(eval_file) as f:
+                        data = json.load(f)
+                    # Verify it has evaluator-specific fields
+                    if "id" in data and "type" in data:
+                        name = data.get("name", eval_file.stem)
+                        self.evaluators.append((name, eval_file))
+                except Exception:
+                    pass
diff --git a/src/uipath/_cli/_interactive/_drill_down.py b/src/uipath/_cli/_interactive/_drill_down.py
new file mode 100644
index 000000000..a200054b0
--- /dev/null
+++ b/src/uipath/_cli/_interactive/_drill_down.py
@@ -0,0 +1,92 @@
+"""Drill-down navigation for eval sets and evaluators."""
+# type: ignore
+
+from typing import TYPE_CHECKING
+
+from .._utils._console import ConsoleLogger
+
+if TYPE_CHECKING:
+    from ._main import InteractiveEvalCLI
+
+console = ConsoleLogger()
+
+
+class DrillDownMixin:
+    """Mixin for drill-down navigation operations."""
+
+    def _drill_down_eval_sets(self: "InteractiveEvalCLI") -> None:
+        """Drill down into eval sets with navigation."""
+        if not self.eval_sets:
+            self._show_no_items_screen("eval sets")
+            return
+
+        current_selection = 0
+        while True:
+            self._clear_screen()
+            console.info("📋 Eval Sets - Navigate & Select")
+            console.info(
+                "⌨️  Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back"
+            )
+            console.info("─" * 65)
+
+            for i, (name, path) in enumerate(self.eval_sets):
+                if i == current_selection:
+                    console.info(f"► {i + 1}. {name} ◄")
+                    self._show_eval_set_preview(path)
+                else:
+                    console.info(f"  {i + 1}. {name}")
+
+            key = self._get_key_input()
+
+            if key in ["q", "Q", "back"]:
+                break
+            elif key == "up":
+                current_selection = (current_selection - 1) % len(self.eval_sets)
+            elif key == "down":
+                current_selection = (current_selection + 1) % len(self.eval_sets)
+            elif key in ["enter", " "]:
+                self._show_eval_set_details(self.eval_sets[current_selection])
+            elif key.isdigit() and 1 <= int(key) <= len(self.eval_sets):
+                current_selection = int(key) - 1
+
+    def _drill_down_evaluators(self: "InteractiveEvalCLI") -> None:
+        """Drill down into evaluators with navigation."""
+        if not self.evaluators:
+            self._show_no_items_screen("evaluators")
+            return
+
+        current_selection = 0
+        while True:
+            self._clear_screen()
+            console.info("⚙️  Evaluators - Navigate & Select")
+            console.info(
+                "⌨️  Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back"
+            )
+            console.info("─" * 65)
+
+            for i, (name, path) in enumerate(self.evaluators):
+                if i == current_selection:
+                    console.info(f"► {i + 1}. {name} ◄")
+                    self._show_evaluator_preview(path)
+                else:
+                    console.info(f"  {i + 1}. {name}")
+
+            key = self._get_key_input()
+
+            if key in ["q", "Q", "back"]:
+                break
+            elif key == "up":
+                current_selection = (current_selection - 1) % len(self.evaluators)
+            elif key == "down":
+                current_selection = (current_selection + 1) % len(self.evaluators)
+            elif key in ["enter", " "]:
+                self._show_evaluator_details(self.evaluators[current_selection])
+            elif key.isdigit() and 1 <= int(key) <= len(self.evaluators):
+                current_selection = int(key) - 1
+
+    def _show_no_items_screen(self: "InteractiveEvalCLI", item_type: str) -> None:
+        """Show no items screen."""
+        self._clear_screen()
+        console.warning(f"No {item_type} found!")
+        console.info("Press Enter to go back...")
+        self._get_input("")
diff --git a/src/uipath/_cli/_interactive/_eval_sets.py b/src/uipath/_cli/_interactive/_eval_sets.py
new file mode 100644
index 000000000..2ac1da8df
--- /dev/null
+++ b/src/uipath/_cli/_interactive/_eval_sets.py
@@ -0,0 +1,347 @@
+"""Eval set operations for interactive CLI."""
+# type: ignore
+
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Dict, List
+
+from .._utils._console import ConsoleLogger
+
+if TYPE_CHECKING:
+    from ._main import InteractiveEvalCLI
+
+console = ConsoleLogger()
+
+
+class EvalSetMixin:
+    """Mixin for eval set operations."""
+
+    def _create_eval_set_simple(self: "InteractiveEvalCLI") -> None:
+        """Create new evaluation set - simplified version."""
+        self._clear_screen()
+        console.info("➕ Create New Eval Set")
+        console.info("─" * 65)
+
+        name = self._get_input("Name: ")
+        if not name:
+            return
+
+        # Create clean filename from name
+        filename = f"{name.lower().replace(' ', '_')}.json"
+
+        # Create basic eval set
+        eval_set = {
+            "id": f"eval-{len(self.eval_sets) + 1}",
+            "fileName": filename,
+            "evaluatorRefs": [],
+            "name": name,
+            "batchSize": 10,
+            "timeoutMinutes": 20,
+            "modelSettings": [],
+            "createdAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            "updatedAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            "evaluations": [],
+        }
+
+        # Ask if they want to add evaluations
+        add_evals = self._get_input("Add evaluations now? (y/n): ").lower()
+        if add_evals in ["y", "yes"]:
+            eval_set["evaluations"] = self._add_evaluations_interactive(
+                str(eval_set["id"])
+            )
+
+        # Ensure evaluationSets directory exists
+        eval_sets_dir = self.project_root / "evaluationSets"
+        eval_sets_dir.mkdir(exist_ok=True)
+
+        # Save file
+        file_path = eval_sets_dir / filename
+
+        with open(file_path, "w") as f:
+            json.dump(eval_set, f, indent=2)
+
+        console.success(f"✅ Created eval set: {filename}")
+        self._discover_files()  # Refresh
+
+    def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
+        """Create new evaluation set with comprehensive questions."""
+        self._clear_screen()
+        console.info("➕ Create New Eval Set - Interactive Wizard")
+        console.info("─" * 65)
+
+        # Basic Information
+        console.info("📝 Basic Information")
+        name = input("➤ Eval Set Name: ").strip()
+        if not name:
+            console.warning("Name is required!")
+            input("Press Enter to continue...")
+            return
+
+        # Create clean filename from name
+        filename = f"{name.lower().replace(' ', '_')}.json"
+
+        # Evaluator References
+        console.info("\n🎯 Evaluator References")
+        console.info("Available evaluators:")
+        for i, (eval_name, _) in enumerate(self.evaluators, 1):
+            console.info(f"  {i}. {eval_name}")
+
+        evaluator_refs = []
+        if self.evaluators:
+            refs_input = input(
+                "➤ Select evaluators (comma-separated numbers, or 'all'): "
+            ).strip()
+            if refs_input.lower() == "all":
+                evaluator_refs = [
+                    self._get_evaluator_id(path) for eval_name, path in self.evaluators
+                ]
+            elif refs_input:
+                try:
+                    for num in refs_input.split(","):
+                        idx = int(num.strip()) - 1
+                        if 0 <= idx < len(self.evaluators):
+                            eval_path = self.evaluators[idx][1]
+                            eval_id = self._get_evaluator_id(eval_path)
+                            evaluator_refs.append(eval_id)
+                except ValueError:
+                    console.warning("Invalid input, no evaluators selected")
+
+        # Test Cases
+        console.info("\n📝 Test Cases")
+        evaluations = []
+        test_count = 1
+
+        while True:
+            console.info(f"\nTest Case #{test_count}")
+            test_name = input("➤ Test Name (or 'done' to finish): ").strip()
+            if test_name.lower() == "done":
+                break
+
+            if not test_name:
+                console.warning("Test name is required!")
+                continue
+
+            # Inputs
+            console.info("📥 Inputs (JSON format)")
+            console.info('Examples: {"a": 5, "b": 3} or {"query": "hello world"}')
+            inputs_str = input("➤ Inputs: ").strip()
+            try:
+                inputs = json.loads(inputs_str) if inputs_str else {}
+            except json.JSONDecodeError:
+                console.warning("Invalid JSON, using empty inputs")
+                inputs = {}
+
+            # Expected Output
+            console.info("📤 Expected Output (JSON format)")
+            expected_str = input("➤ Expected Output: ").strip()
+            try:
+                expected_output = json.loads(expected_str) if expected_str else {}
+            except json.JSONDecodeError:
+                console.warning("Invalid JSON, using empty expected output")
+                expected_output = {}
+
+            evaluation: Dict[str, Any] = {
+                "id": f"test-{test_count}",
+                "name": test_name,
+                "inputs": inputs,
+                "expectedOutput": expected_output,
+                "expectedAgentBehavior": "",
+                "simulationInstructions": "",
+                "simulateInput": False,
+                "inputGenerationInstructions": "",
+                "simulateTools": False,
+                "toolsToSimulate": [],
+                "evalSetId": f"eval-{len(self.eval_sets) + 1}",
+                "createdAt": datetime.now(timezone.utc)
+                .isoformat()
+                .replace("+00:00", "Z"),
+                "updatedAt": datetime.now(timezone.utc)
+                .isoformat()
+                .replace("+00:00", "Z"),
+            }
+            evaluations.append(evaluation)
+            test_count += 1
+
+        if not evaluations:
+            console.warning("At least one test case is required!")
+            input("Press Enter to continue...")
+            return
+
+        # Create eval set
+        eval_set = {
+            "id": f"eval-{len(self.eval_sets) + 1}",
+            "fileName": filename,
+            "evaluatorRefs": evaluator_refs,
+            "name": name,
+            "batchSize": 10,
+            "timeoutMinutes": 20,
+            "modelSettings": [],
+            "createdAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            "updatedAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            "evaluations": evaluations,
+        }
+
+        # Ensure evaluationSets directory exists
+        eval_sets_dir = self.project_root / "evaluationSets"
+        eval_sets_dir.mkdir(exist_ok=True)
+
+        # Save file
+        file_path = eval_sets_dir / filename
+
+        try:
+            with open(file_path, "w") as f:
+                json.dump(eval_set, f, indent=2)
+
+            console.success(f"\n✅ Created eval set: {filename}")
+            console.info(f"📊 Tests: {len(evaluations)}")
+            console.info(f"⚙️  Evaluators: {len(evaluator_refs)}")
+
+            self._discover_files()  # Refresh
+        except Exception as e:
+            console.error(f"Failed to create eval set: {e}")
+
+        input("\nPress Enter to continue...")
+
+    def _add_evaluations_interactive(
+        self: "InteractiveEvalCLI", eval_set_id: str
+    ) -> List[Dict[str, Any]]:
+        """Add evaluations interactively."""
+        evaluations = []
+        test_count = 1
+
+        while True:
+            console.info(f"\nTest Case #{test_count}")
+            test_name = self._get_input("Test Name (or 'done' to finish): ")
+            if test_name.lower() == "done":
+                break
+
+            if not test_name:
+                console.warning("Test name is required!")
+                continue
+
+            # Inputs
+            console.info("📥 Inputs (JSON format)")
+            console.info('Examples: {"a": 5, "b": 3} or {"query": "hello world"}')
+            inputs_str = input("➤ Inputs: ").strip()
+            try:
+                inputs = json.loads(inputs_str) if inputs_str else {}
+            except json.JSONDecodeError:
+                console.warning("Invalid JSON, using empty inputs")
+                inputs = {}
+
+            # Expected Output
+            console.info("📤 Expected Output (JSON format)")
+            expected_str = input("➤ Expected Output: ").strip()
+            try:
+                expected_output = json.loads(expected_str) if expected_str else {}
+            except json.JSONDecodeError:
+                console.warning("Invalid JSON, using empty expected output")
+                expected_output = {}
+
+            evaluation: Dict[str, Any] = {
+                "id": f"test-{test_count}",
+                "name": test_name,
+                "inputs": inputs,
+                "expectedOutput": expected_output,
+                "expectedAgentBehavior": "",
+                "simulationInstructions": "",
+                "simulateInput": False,
+                "inputGenerationInstructions": "",
+                "simulateTools": False,
+                "toolsToSimulate": [],
+                "evalSetId": eval_set_id,
+                "createdAt": datetime.now(timezone.utc)
+                .isoformat()
+                .replace("+00:00", "Z"),
+                "updatedAt": datetime.now(timezone.utc)
+                .isoformat()
+                .replace("+00:00", "Z"),
+            }
+            evaluations.append(evaluation)
+            test_count += 1
+
+        return evaluations
+
+    def _list_eval_sets(self: "InteractiveEvalCLI") -> None:
+        """List available eval sets."""
+        console.info("\n📋 Available Eval Sets:")
+        if not self.eval_sets:
+            console.warning("No eval sets found")
+            return
+
+        for i, (name, path) in enumerate(self.eval_sets, 1):
+            try:
+                with open(path) as f:
+                    data = json.load(f)
+                test_count = len(data.get("evaluations", []))
+                evaluator_count = len(data.get("evaluatorRefs", []))
+                console.info(f"{i}. {name}")
+                console.info(f"   Tests: {test_count} | Evaluators: {evaluator_count}")
+                console.info(f"   File: {path.name}")
+            except Exception:
+                console.info(f"{i}. {name} (error loading)")
+
+    def _show_eval_set_preview(self: "InteractiveEvalCLI", path: Path) -> None:
+        """Show eval set preview info."""
+        try:
+            with open(path) as f:
+                data = json.load(f)
+            test_count = len(data.get("evaluations", []))
+            evaluator_count = len(data.get("evaluatorRefs", []))
+            console.info(f"    📄 {path.name}")
+            console.info(f"    📊 Tests: {test_count} | Evaluators: {evaluator_count}")
+        except Exception:
+            console.info(f"    📄 {path.name} (error loading)")
+
+    def _show_eval_set_details(
+        self: "InteractiveEvalCLI", eval_set_tuple: tuple[str, Path]
+    ) -> None:
+        """Show detailed eval set view."""
+        name, path = eval_set_tuple
+        self._clear_screen()
+        console.info(f"📋 Eval Set Details: {name}")
+        console.info("─" * 65)
+
+        try:
+            with open(path) as f:
+                data = json.load(f)
+
+            console.info(f"\n📄 {path.name}")
+            console.info(f"🆔 ID: {data.get('id', 'Unknown')}")
+            console.info(f"📊 Tests: {len(data.get('evaluations', []))}")
+            console.info(f"⚙️  Evaluators: {len(data.get('evaluatorRefs', []))}")
+            console.info(f"📦 Batch Size: {data.get('batchSize', 'Unknown')}")
+            console.info(f"⏱️  Timeout: {data.get('timeoutMinutes', 'Unknown')} minutes")
+
+            evaluator_refs = data.get("evaluatorRefs", [])
+            if evaluator_refs:
+                console.info("\n🎯 Evaluator References:")
+                for ref in evaluator_refs:
+                    console.info(f"   • {ref}")
+
+            evaluations = data.get("evaluations", [])
+            if evaluations:
+                console.info("\n📝 Test Cases:")
+                for i, eval_data in enumerate(evaluations[:10], 1):  # Show first 10
+                    test_name = eval_data.get("name", f"Test {i}")
+                    console.info(f"   {i}. {test_name}")
+                    if "inputs" in eval_data:
+                        inputs_preview = str(eval_data["inputs"])[:60]
+                        if len(str(eval_data["inputs"])) > 60:
+                            inputs_preview += "..."
+                        console.info(f"      Input: {inputs_preview}")
+                    if "expectedOutput" in eval_data:
+                        output_preview = str(eval_data["expectedOutput"])[:60]
+                        if len(str(eval_data["expectedOutput"])) > 60:
+                            output_preview += "..."
+                        console.info(f"      Expected: {output_preview}")
+
+                if len(evaluations) > 10:
+                    console.info(f"\n   ... and {len(evaluations) - 10} more tests")
+
+        except Exception as e:
+            console.error(f"Error loading eval set: {e}")
+
+        console.info("\n💡 Press Backspace to go back")
+        self._get_key_input()
diff --git a/src/uipath/_cli/_interactive/_evaluators.py b/src/uipath/_cli/_interactive/_evaluators.py
new file mode 100644
index 000000000..541a5bbf1
--- /dev/null
+++ b/src/uipath/_cli/_interactive/_evaluators.py
@@ -0,0 +1,293 @@
+"""Evaluator operations for interactive CLI."""
+# type: ignore
+
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from .._utils._console import ConsoleLogger
+
+if TYPE_CHECKING:
+    from ._main import InteractiveEvalCLI
+
+console = ConsoleLogger()
+
+
+class EvaluatorMixin:
+    """Mixin for evaluator operations."""
+
+    def _create_evaluator_simple(self: "InteractiveEvalCLI") -> None:
+        """Create new evaluator - simplified version."""
+        self._clear_screen()
+        console.info("➕ Create New Evaluator")
+        console.info("─" * 65)
+
+        name = self._get_input("Name: ")
+        if not name:
+            return
+
+        # Create basic evaluator
+        evaluator = {
+            "id": f"eval-{name.lower().replace(' ', '-')}",
+            "name": name,
+            "description": f"{name} evaluator",
+            "category": 0,
+            "type": 1,
+            "targetOutputKey": "*",
+            "createdAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            "updatedAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+        }
+
+        # Ensure evaluators directory exists
+        evaluators_dir = self.project_root / "evaluators"
+        evaluators_dir.mkdir(exist_ok=True)
+
+        # Save file
+        filename = f"{name.lower().replace(' ', '_')}.json"
+        file_path = evaluators_dir / filename
+
+        with open(file_path, "w") as f:
+            json.dump(evaluator, f, indent=2)
+
+        console.success(f"✅ Created evaluator: {filename}")
+        self._discover_files()  # Refresh
+
+    def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None:
+        """Create new evaluator with comprehensive questions."""
+        self._clear_screen()
+        console.info("➕ Create New Evaluator - Interactive Wizard")
+        console.info("─" * 65)
+
+        # Basic Information
+        console.info("📝 Basic Information")
+        name = input("➤ Evaluator Name: ").strip()
+        if not name:
+            console.warning("Name is required!")
+            input("Press Enter to continue...")
+            return
+
+        description = input("➤ Description: ").strip() or f"{name} evaluator"
+
+        # Category Selection
+        console.info("\n🏷️ Category Selection")
+        categories = {
+            0: "Deterministic",
+            1: "LLM as Judge",
+            2: "Agent Scorer",
+            3: "Trajectory",
+        }
+
+        for key, value in categories.items():
+            console.info(f"  {key}. {value}")
+
+        try:
+            category = int(input("➤ Select Category (0-3): ") or "0")
+            if category not in categories:
+                category = 0
+        except ValueError:
+            category = 0
+
+        # Type Selection
+        console.info(f"\n🎯 Type Selection (Category: {categories[category]})")
+        types = {
+            0: "Unknown",
+            1: "Exact Match",
+            2: "Contains",
+            3: "Regex",
+            4: "Factuality",
+            5: "Custom",
+            6: "JSON Similarity",
+            7: "Trajectory",
+        }
+
+        # Show relevant types based on category
+        relevant_types = []
+        if category == 0:  # Deterministic
+            relevant_types = [
+                1,
+                2,
+                3,
+                6,
+            ]  # Exact Match, Contains, Regex, JSON Similarity
+        elif category == 1:  # LLM as Judge
+            relevant_types = [4, 5]  # Factuality, Custom
+        elif category == 3:  # Trajectory
+            relevant_types = [7]  # Trajectory
+        else:
+            relevant_types = list(types.keys())
+
+        for type_id in relevant_types:
+            console.info(f"  {type_id}. {types[type_id]}")
+
+        try:
+            eval_type = int(
+                input(f"➤ Select Type ({', '.join(map(str, relevant_types))}): ")
+                or str(relevant_types[0])
+            )
+            if eval_type not in relevant_types:
+                eval_type = relevant_types[0]
+        except (ValueError, IndexError):
+            eval_type = 1
+
+        # Target Output Key
+        console.info("\n🔍 Target Configuration")
+        console.info(
+            "Target Output Key determines which part of the output to evaluate"
+        )
+        console.info("Examples: '*' (all), 'result', 'answer', 'output'")
+        target_key = input("➤ Target Output Key (default: '*'): ").strip() or "*"
+
+        # Create basic evaluator
+        evaluator = {
+            "id": f"eval-{name.lower().replace(' ', '-')}",
+            "name": name,
+            "description": description,
+            "category": category,
+            "type": eval_type,
+            "targetOutputKey": target_key,
+            "createdAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            "updatedAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+        }
+
+        # LLM Configuration (if LLM as Judge)
+        if category == 1:  # LLM as Judge
+            console.info("\n🤖 LLM Configuration")
+            model_name = input("➤ Model Name (default: gpt-4): ").strip() or "gpt-4"
+
+            console.info("📝 Evaluation Prompt")
+            console.info("This prompt will be used to evaluate the agent's output")
+            prompt = input("➤ Evaluation Prompt: ").strip()
+
+            if prompt:
+                evaluator["llmConfig"] = {
+                    "modelName": model_name,
+                    "prompt": prompt,
+                    "temperature": 0.0,
+                    "maxTokens": 1000,
+                }
+
+        # Ensure evaluators directory exists
+        evaluators_dir = self.project_root / "evaluators"
+        evaluators_dir.mkdir(exist_ok=True)
+
+        # Save file
+        filename = f"{name.lower().replace(' ', '_')}.json"
+        file_path = evaluators_dir / filename
+
+        try:
+            with open(file_path, "w") as f:
+                json.dump(evaluator, f, indent=2)
+
+            console.success(f"\n✅ Created evaluator: {filename}")
+            console.info(f"🏷️  Category: {categories[category]}")
+            console.info(f"🎯 Type: {types[eval_type]}")
+            console.info(f"🔍 Target: {target_key}")
+
+            self._discover_files()  # Refresh
+        except Exception as e:
+            console.error(f"Failed to create evaluator: {e}")
+
+        input("\nPress Enter to continue...")
+
+    def _list_evaluators(self: "InteractiveEvalCLI") -> None:
+        """List available evaluators."""
+        console.info("\n⚙️  Available Evaluators:")
+        if not self.evaluators:
+            console.warning("No evaluators found")
+            return
+
+        for i, (name, path) in enumerate(self.evaluators, 1):
+            try:
+                with open(path) as f:
+                    data = json.load(f)
+                category = self._get_category_name(data.get("category", 0))
+                type_name = self._get_type_name(data.get("type", 1))
+                console.info(f"{i}. {name}")
+                console.info(f"   Type: {category} | {type_name}")
+                console.info(f"   File: {path.name}")
+            except Exception:
+                console.info(f"{i}. {name} (error loading)")
+
+    def _show_evaluator_preview(self: "InteractiveEvalCLI", path: Path) -> None:
+        """Show evaluator preview info."""
+        try:
+            with open(path) as f:
+                data = json.load(f)
+            category = self._get_category_name(data.get("category", 0))
+            type_name = self._get_type_name(data.get("type", 1))
+            console.info(f"    📄 {path.name}")
+            console.info(f"    🎯 Type: {category} | {type_name}")
+        except Exception:
+            console.info(f"    📄 {path.name} (error loading)")
+
+    def _show_evaluator_details(
+        self: "InteractiveEvalCLI", evaluator_tuple: tuple[str, Path]
+    ) -> None:
+        """Show detailed evaluator view."""
+        name, path = evaluator_tuple
+        self._clear_screen()
+        console.info(f"⚙️  Evaluator Details: {name}")
+        console.info("─" * 65)
+
+        try:
+            with open(path) as f:
+                data = json.load(f)
+
+            console.info(f"\n📄 {path.name}")
+            console.info(f"🆔 ID: {data.get('id', 'Unknown')}")
+            console.info(f"📝 Description: {data.get('description', 'No description')}")
+            console.info(
+                f"🏷️  Category: {self._get_category_name(data.get('category', 0))}"
+            )
+            console.info(f"🎯 Type: {self._get_type_name(data.get('type', 1))}")
+            console.info(f"🔍 Target Key: {data.get('targetOutputKey', '*')}")
+
+            if "llmConfig" in data:
+                llm_config = data["llmConfig"]
+                console.info("\n🤖 LLM Configuration:")
+                console.info(f"   Model: {llm_config.get('modelName', 'Unknown')}")
+                if "prompt" in llm_config:
+                    prompt_preview = llm_config["prompt"][:100]
+                    if len(llm_config["prompt"]) > 100:
+                        prompt_preview += "..."
+                    console.info(f"   Prompt: {prompt_preview}")
+
+        except Exception as e:
+            console.error(f"Error loading evaluator: {e}")
+
+        console.info("\n💡 Press Backspace to go back")
+        self._get_key_input()
+
+    def _get_category_name(self: "InteractiveEvalCLI", category: int) -> str:
+        """Get category name from number."""
+        categories = {
+            0: "Deterministic",
+            1: "LLM as Judge",
+            2: "Agent Scorer",
+            3: "Trajectory",
+        }
+        return categories.get(category, "Unknown")
+
+    def _get_type_name(self: "InteractiveEvalCLI", eval_type: int) -> str:
+        """Get type name from number."""
+        types = {
+            0: "Unknown",
+            1: "Exact Match",
+            2: "Contains",
+            3: "Regex",
+            4: "Factuality",
+            5: "Custom",
+            6: "JSON Similarity",
+            7: "Trajectory",
+        }
+        return types.get(eval_type, "Unknown")
+
+    def _get_evaluator_id(self: "InteractiveEvalCLI", path: Path) -> str:
+        """Get evaluator ID from file."""
+        try:
+            with open(path) as f:
+                data = json.load(f)
+            return data.get("id", path.stem)
+        except Exception:
+            return path.stem
diff --git a/src/uipath/_cli/_interactive/_execution.py b/src/uipath/_cli/_interactive/_execution.py
new file mode 100644
index 000000000..8152fb3a5
--- /dev/null
+++ b/src/uipath/_cli/_interactive/_execution.py
@@ -0,0 +1,153 @@
+"""Execution utilities for running evaluations."""
+# type: ignore
+
+import subprocess
+import sys
+from pathlib import Path
+from typing import TYPE_CHECKING, Optional
+
+from .._utils._console import ConsoleLogger
+
+if TYPE_CHECKING:
+    from ._main import InteractiveEvalCLI
+
+console = ConsoleLogger()
+
+
+class ExecutionMixin:
+    """Mixin for execution operations."""
+
+    def _execute_evaluation(self: "InteractiveEvalCLI", eval_path: Path) -> None:
+        """Execute evaluation with live results."""
+        console.info("\n🚀 Running evaluation...")
+
+        # Find main.py
+        main_py = self._find_main_py()
+        if not main_py:
+            console.error("Could not find main.py")
+            return
+
+        # Build command - run from the project directory
+        cmd = [
+            sys.executable,
+            "-m",
+            "uipath._cli.cli_eval",
+            str(main_py.relative_to(self.project_root)),
+            str(eval_path.relative_to(self.project_root)),
+            "--no-report",
+            "--workers",
+            "1",
+        ]
+
+        console.info(
+            f"💻 Command: uipath eval {main_py.name} {eval_path.name} --no-report"
+        )
+
+        try:
+            # Run with real-time output from project directory
+            process = subprocess.Popen(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                bufsize=1,
+                universal_newlines=True,
+                cwd=self.project_root,
+            )
+
+            # Stream output in real-time
+            if process.stdout:
+                for line in process.stdout:
+                    print(line.rstrip())
+
+            process.wait()
+
+            if process.returncode == 0:
+                console.success("\n✅ Evaluation completed successfully!")
+            else:
+                console.error(
+                    f"\n❌ Evaluation failed (exit code: {process.returncode})"
+                )
+
+        except Exception as e:
+            console.error(f"Failed to run evaluation: {e}")
+
+    def _execute_evaluation_no_clear(
+        self: "InteractiveEvalCLI", eval_path: Path
+    ) -> None:
+        """Execute evaluation without clearing screen."""
+        console.info("\n🚀 Running evaluation...")
+
+        # Find main.py
+        main_py = self._find_main_py()
+        if not main_py:
+            console.error("Could not find main.py")
+            input("\nPress Enter to continue...")
+            return
+
+        # Build command - run from the project directory
+        cmd = [
+            sys.executable,
+            "-m",
+            "uipath._cli.cli_eval",
+            str(main_py.relative_to(self.project_root)),
+            str(eval_path.relative_to(self.project_root)),
+            "--no-report",
+            "--workers",
+            "1",
+        ]
+
+        console.info(
+            f"💻 Command: uipath eval {main_py.name} {eval_path.name} --no-report"
+        )
+
+        try:
+            # Run with real-time output from project directory
+            process = subprocess.Popen(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                bufsize=1,
+                universal_newlines=True,
+                cwd=self.project_root,
+            )
+
+            # Stream output in real-time
+            if process.stdout:
+                for line in process.stdout:
+                    print(line.rstrip())
+
+            process.wait()
+
+            if process.returncode == 0:
+                console.success("\n✅ Evaluation completed successfully!")
+            else:
+                console.error(
+                    f"\n❌ Evaluation failed (exit code: {process.returncode})"
+                )
+
+        except Exception as e:
+            console.error(f"Failed to run evaluation: {e}")
+
+        input("\nPress Enter to continue...")
+
+    def _find_main_py(self: "InteractiveEvalCLI") -> Optional[Path]:
+        """Find main.py file."""
+        # Check current directory
+        main_py = self.project_root / "main.py"
+        if main_py.exists():
+            return main_py
+
+        # Check parent directories
+        for parent in self.project_root.parents:
+            main_py = parent / "main.py"
+            if main_py.exists():
+                return main_py
+
+        return None
+
+    def _confirm(self: "InteractiveEvalCLI", prompt: str) -> bool:
+        """Ask for confirmation."""
+        response = self._get_input(f"{prompt} (y/n): ").lower()
+        return response in ["y", "yes"]
diff --git a/src/uipath/_cli/_interactive/_main.py b/src/uipath/_cli/_interactive/_main.py
new file mode 100644
index 000000000..c41f4023a
--- /dev/null
+++ b/src/uipath/_cli/_interactive/_main.py
@@ -0,0 +1,199 @@
+"""Main interactive CLI for evaluations."""
+
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+from .._utils._console import ConsoleLogger
+from ._discovery import DiscoveryMixin
+from ._drill_down import DrillDownMixin
+from ._eval_sets import EvalSetMixin
+from ._evaluators import EvaluatorMixin
+from ._execution import ExecutionMixin
+from ._navigation import HAS_NAVIGATION, NavigationMixin
+
+console = ConsoleLogger()
+
+
+class InteractiveEvalCLI(
+    NavigationMixin,
+    DiscoveryMixin,
+    EvalSetMixin,
+    EvaluatorMixin,
+    ExecutionMixin,
+    DrillDownMixin,
+):
+    """Simple, fast, keyboard-driven evaluation CLI."""
+
+    def __init__(self, project_root: Optional[Path] = None):
+        self.project_root = project_root or Path.cwd()
+        self.eval_sets: List[Tuple[str, Path]] = []
+        self.evaluators: List[Tuple[str, Path]] = []
+        self.current_selection = 0
+        self.menu_items = [
+            "📋 List eval sets",
+            "⚙️  List evaluators",
+            "⚡ Quick run (auto-select)",
+            "➕ Create eval set",
+            "➕ Create evaluator",
+            "🎯 Run specific combination",
+        ]
+        self._discover_files()
+
+    def run(self) -> None:
+        """Run the interactive CLI."""
+        self._show_ascii_art()
+
+        if not HAS_NAVIGATION:
+            console.warning(
+                "⚠️  Terminal navigation not available. Using fallback mode."
+            )
+            console.info("Consider using a standard terminal for better experience.\n")
+            self._run_fallback_mode()
+            return
+
+        try:
+            self._run_navigation_mode()
+        except KeyboardInterrupt:
+            console.info("\n👋 Goodbye!")
+
+    def _run_navigation_mode(self) -> None:
+        """Run with arrow key navigation."""
+        while True:
+            self._clear_screen()
+            self._show_ascii_art()
+            self._show_menu(self.current_selection, self.menu_items)
+
+            key = self._get_key_input()
+
+            if key == "up":
+                self.current_selection = (self.current_selection - 1) % len(
+                    self.menu_items
+                )
+            elif key == "down":
+                self.current_selection = (self.current_selection + 1) % len(
+                    self.menu_items
+                )
+            elif key in ["enter", " "]:
+                self._execute_menu_item_with_navigation(self.current_selection)
+            elif key.isdigit() and 1 <= int(key) <= 6:
+                self._execute_menu_item_with_navigation(int(key) - 1)
+
+    def _execute_menu_item_with_navigation(self, index: int) -> None:
+        """Execute menu item with navigation support."""
+        if index == 0:
+            self._drill_down_eval_sets()
+        elif index == 1:
+            self._drill_down_evaluators()
+        elif index == 2:
+            self._quick_run_with_navigation()
+        elif index == 3:
+            self._create_eval_set_interactive()
+        elif index == 4:
+            self._create_evaluator_interactive()
+        elif index == 5:
+            self._run_specific_combination()
+
+    def _run_fallback_mode(self) -> None:
+        """Run without navigation - simple text interface."""
+        while True:
+            console.info("\n⚙️  Main Menu:")
+            for i, item in enumerate(self.menu_items, 1):
+                console.info(f"  {i}. {item}")
+            console.info("  0. Exit")
+
+            try:
+                choice = input("\n➤ Select option: ").strip()
+
+                if choice == "0":
+                    console.info("👋 Goodbye!")
+                    break
+                elif choice == "1":
+                    self._list_eval_sets_navigation()
+                elif choice == "2":
+                    self._list_evaluators()
+                elif choice == "3":
+                    self._quick_run()
+                elif choice == "4":
+                    self._create_eval_set_simple()
+                elif choice == "5":
+                    self._create_evaluator_simple()
+                elif choice == "6":
+                    self._run_specific_combination()
+                else:
+                    console.warning("Invalid option")
+            except KeyboardInterrupt:
+                console.info("\n👋 Goodbye!")
+                break
+
+    def _quick_run_with_navigation(self) -> None:
+        """Quick run evaluation with auto-selected eval set."""
+        if not self.eval_sets:
+            self._clear_screen()
+            console.warning("No eval sets found!")
+            console.info("Press Enter to go back...")
+            self._get_input("")
+            return
+
+        # Use first eval set
+        eval_name, eval_path = self.eval_sets[0]
+
+        self._clear_screen()
+        console.info(f"⚡ Quick Run: {eval_name}")
+        console.info("─" * 65)
+
+        if self._confirm("Run evaluation now?"):
+            self._execute_evaluation_no_clear(eval_path)
+
+    def _quick_run(self) -> None:
+        """Quick run evaluation with auto-selected eval set."""
+        if not self.eval_sets:
+            console.warning("No eval sets found!")
+            return
+
+        # Use first eval set
+        eval_name, eval_path = self.eval_sets[0]
+        console.info(f"\n⚡ Quick Run: {eval_name}")
+
+        if self._confirm("Run evaluation now?"):
+            self._execute_evaluation(eval_path)
+
+    def _list_eval_sets_navigation(self) -> None:
+        """List eval sets with navigation."""
+        self._clear_screen()
+        console.info("📋 Available Eval Sets")
+        console.info("─" * 65)
+        self._list_eval_sets()
+        input("\nPress Enter to continue...")
+
+    def _run_specific_combination(self) -> None:
+        """Run specific eval set and evaluator combination."""
+        self._clear_screen()
+        console.info("🎯 Run Specific Combination")
+        console.info("─" * 65)
+
+        # Select eval set
+        console.info("\n📋 Select Eval Set:")
+        for i, (name, _) in enumerate(self.eval_sets, 1):
+            console.info(f"  {i}. {name}")
+
+        try:
+            eval_idx = int(input("\n➤ Eval Set Number: ").strip()) - 1
+            if not (0 <= eval_idx < len(self.eval_sets)):
+                console.error("Invalid selection")
+                input("\nPress Enter to continue...")
+                return
+
+            eval_name, eval_path = self.eval_sets[eval_idx]
+
+            console.info(f"\n✅ Selected: {eval_name}")
+            if self._confirm("Run evaluation now?"):
+                self._execute_evaluation_no_clear(eval_path)
+        except ValueError:
+            console.error("Invalid selection")
+            input("\nPress Enter to continue...")
+
+
+def launch_interactive_cli(project_root: Optional[Path] = None) -> None:
+    """Launch the interactive CLI."""
+    cli = InteractiveEvalCLI(project_root)
+    cli.run()
diff --git a/src/uipath/_cli/_interactive/_navigation.py b/src/uipath/_cli/_interactive/_navigation.py
new file mode 100644
index 000000000..4f8077ca0
--- /dev/null
+++ b/src/uipath/_cli/_interactive/_navigation.py
@@ -0,0 +1,109 @@
+"""Navigation and input handling for interactive CLI."""
+
+import sys
+import termios
+import tty
+
+from .._utils._console import ConsoleLogger
+
+console = ConsoleLogger()
+
+
+def has_termios() -> bool:
+    """Check if we have termios support for advanced input."""
+    try:
+        termios.tcgetattr(sys.stdin)
+        return True
+    except Exception:
+        return False
+
+
+HAS_NAVIGATION = has_termios()
+
+
+class NavigationMixin:
+    """Mixin for navigation and input handling."""
+
+    def _clear_screen(self) -> None:
+        """Clear the screen."""
+        print("\033[2J\033[H", end="")
+
+    def _get_input(self, prompt: str) -> str:
+        """Get input from user."""
+        return input(prompt).strip()
+
+    def _get_key_input(self) -> str:
+        """Get key input with arrow key support."""
+        if not HAS_NAVIGATION:
+            return input("➤ ").strip().lower()
+
+        old_settings = termios.tcgetattr(sys.stdin)
+        try:
+            tty.setraw(sys.stdin)
+
+            # Read first character
+            char = sys.stdin.read(1)
+
+            # Check for escape sequences (arrow keys)
+            if char == "\x1b":  # ESC
+                next_char = sys.stdin.read(1)
+                if next_char == "[":
+                    arrow = sys.stdin.read(1)
+                    if arrow == "A":
+                        return "up"
+                    elif arrow == "B":
+                        return "down"
+                return ""
+
+            # Backspace handling
+            if char == "\x7f":  # Backspace (DEL)
+                return "back"
+            elif char == "\x08":  # Backspace (BS)
+                return "back"
+
+            # Enter key
+            if char in ["\r", "\n"]:
+                return "enter"
+
+            # Digit keys
+            elif char.isdigit() and 1 <= int(char) <= 6:
+                return char
+            elif char == "\x03":  # Ctrl+C
+                raise KeyboardInterrupt
+
+            return ""
+        except Exception:
+            return input("➤ ").strip().lower()
+        finally:
+            # Restore terminal settings
+            try:
+                termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
+            except Exception:
+                pass
+
+    def _show_ascii_art(self) -> None:
+        """Display ASCII art banner."""
+        art = """
+  ██╗   ██╗██╗██████╗  █████╗ ████████╗██╗  ██╗
+  ██║   ██║██║██╔══██╗██╔══██╗╚══██╔══╝██║  ██║
+  ██║   ██║██║██████╔╝███████║   ██║   ███████║
+  ██║   ██║██║██╔═══╝ ██╔══██║   ██║   ██╔══██║
+  ╚██████╔╝██║██║     ██║  ██║   ██║   ██║  ██║
+   ╚═════╝ ╚═╝╚═╝     ╚═╝  ╚═╝   ╚═╝   ╚═╝  ╚═╝
+
+            Evaluation Builder
+        Interactive Evaluation Toolkit
+        """
+        console.info(art)
+
+    def _show_menu(self, current_selection: int, menu_items: list[str]) -> None:
+        """Show menu with current selection highlighted."""
+        console.info("\n⚙️  Main Menu:")
+        console.info("─" * 65)
+        for i, item in enumerate(menu_items):
+            if i == current_selection:
+                console.info(f"  ▶ {item}")
+            else:
+                console.info(f"    {item}")
+        console.info("\n💡 Use ↑/↓ arrows to navigate, Enter to select, or type 1-6")
+        console.info("Press Ctrl+C to exit")
diff --git a/src/uipath/_cli/_utils/_eval_set.py b/src/uipath/_cli/_utils/_eval_set.py
index 9e95d0c71..53d55e216 100644
--- a/src/uipath/_cli/_utils/_eval_set.py
+++ b/src/uipath/_cli/_utils/_eval_set.py
@@ -13,7 +13,7 @@
 class EvalHelpers:
     @staticmethod
     def auto_discover_eval_set() -> str:
-        """Auto-discover evaluation set from evals/eval-sets directory.
+        """Auto-discover evaluation set from evaluationSets or evals/eval-sets directory.
 
         Returns:
             Path to the evaluation set file
@@ -21,19 +21,24 @@ def auto_discover_eval_set() -> str:
         Raises:
             ValueError: If no eval set found or multiple eval sets exist
         """
-        eval_sets_dir = Path("evals/eval-sets")
+        # Try evaluationSets folder first (new structure)
+        eval_sets_dir = Path("evaluationSets")
+
+        # Fall back to evals/eval-sets (old structure)
+        if not eval_sets_dir.exists():
+            eval_sets_dir = Path("evals/eval-sets")
 
         if not eval_sets_dir.exists():
             raise ValueError(
-                "No 'evals/eval-sets' directory found. "
-                "Please set 'UIPATH_PROJECT_ID' env var and run 'uipath pull'."
+                "No 'evaluationSets' or 'evals/eval-sets' directory found. "
+                "Please create an evaluation set or set 'UIPATH_PROJECT_ID' env var and run 'uipath pull'."
             )
 
         eval_set_files = list(eval_sets_dir.glob("*.json"))
 
         if not eval_set_files:
             raise ValueError(
-                "No evaluation set files found in 'evals/eval-sets' directory. "
+                f"No evaluation set files found in '{eval_sets_dir}' directory. "
             )
 
         if len(eval_set_files) > 1:
diff --git a/src/uipath/_cli/cli_eval.py b/src/uipath/_cli/cli_eval.py
index 53dd3bc12..bfdcdbcde 100644
--- a/src/uipath/_cli/cli_eval.py
+++ b/src/uipath/_cli/cli_eval.py
@@ -31,6 +31,57 @@
 console = ConsoleLogger()
 
 
+def _display_local_results(results_data):
+    """Display evaluation results locally in a formatted way."""
+    if not results_data:
+        return
+
+    evaluation_set_name = results_data.get("evaluationSetName", "Unknown")
+    overall_score = results_data.get("score", 0.0)
+    evaluation_results = results_data.get("evaluationSetResults", [])
+
+    console.info(f"\n🎯 Evaluation Report: {evaluation_set_name}")
+    console.info(f"📊 Overall Score: {overall_score:.1f}%")
+    console.info("=" * 60)
+
+    passed_count = 0
+    total_count = len(evaluation_results)
+
+    for i, test in enumerate(evaluation_results, 1):
+        test_score = test.get("score", 0.0)
+        test_name = test.get("evaluationName", f"Test {i}")
+
+        if test_score == 100.0:
+            status = "✅ PASS"
+            passed_count += 1
+        elif test_score == 0.0:
+            status = "❌ FAIL"
+        else:
+            status = "⚠️  PARTIAL"
+            passed_count += 0.5  # Partial credit
+
+        console.info(f"\n{i}. {test_name}: {status} ({test_score:.1f}%)")
+
+        evaluator_results = test.get("evaluationRunResults", [])
+        for evaluator_result in evaluator_results:
+            evaluator_name = evaluator_result.get("evaluatorName", "Unknown Evaluator")
+            result = evaluator_result.get("result", {})
+            score = result.get("score", 0.0)
+            eval_time = result.get("evaluationTime", 0.0)
+            console.info(
+                f"   └─ {evaluator_name}: {score:.1f}% ({eval_time * 1000:.2f}ms)"
+            )
+
+    console.info(f"\n🎯 Summary: {int(passed_count)}/{total_count} tests passed")
+    if overall_score == 100.0:
+        console.success("🎉 All tests passed!")
+    elif overall_score == 0.0:
+        console.info("💥 All tests failed!")
+    else:
+        console.info(f"⚡ Partial success: {overall_score:.1f}% overall score")
+    console.info("")
+
+
 class LiteralOption(click.Option):
     def type_cast_value(self, ctx, value):
         try:
@@ -61,6 +112,12 @@ def type_cast_value(self, ctx, value):
     type=click.Path(exists=False),
     help="File path where the output will be written",
 )
+@click.option(
+    "--interactive",
+    is_flag=True,
+    help="Launch streamlined keyboard-only interactive CLI",
+    default=False,
+)
 @track(when=lambda *_a, **_kw: os.getenv(ENV_JOB_ID) is None)
 def eval(
     entrypoint: Optional[str],
@@ -69,6 +126,7 @@ def eval(
     no_report: bool,
     workers: int,
     output_file: Optional[str],
+    interactive: bool,
 ) -> None:
     """Run an evaluation set against the agent.
 
@@ -78,7 +136,21 @@ def eval(
         eval_ids: Optional list of evaluation IDs
         workers: Number of parallel workers for running evaluations
         no_report: Do not report the evaluation results
+        interactive: Launch streamlined keyboard-only interactive CLI
     """
+    # Handle interactive mode
+    if interactive:
+        try:
+            from ._interactive import launch_interactive_cli
+
+            launch_interactive_cli()
+            return
+        except ImportError as e:
+            console.error(f"Interactive mode requires additional dependencies: {e}")
+            return
+        except Exception as e:
+            console.error(f"Failed to launch interactive mode: {e}")
+            return
     if not no_report and not os.getenv("UIPATH_FOLDER_KEY"):
         os.environ["UIPATH_FOLDER_KEY"] = asyncio.run(
             get_personal_workspace_key_async()
@@ -131,16 +203,24 @@ def generate_runtime_context(**context_kwargs) -> UiPathRuntimeContext:
             if eval_context.job_id:
                 runtime_factory.add_span_exporter(LlmOpsHttpExporter())
 
+            eval_runtime_ref = None
+
             async def execute():
+                nonlocal eval_runtime_ref
                 async with UiPathEvalRuntime.from_eval_context(
                     factory=runtime_factory,
                     context=eval_context,
                     event_bus=event_bus,
                 ) as eval_runtime:
+                    eval_runtime_ref = eval_runtime
                     await eval_runtime.execute()
                     await event_bus.wait_for_all(timeout=10)
 
             asyncio.run(execute())
+
+            # Display results locally when --no-report is used
+            if no_report and eval_runtime_ref and eval_runtime_ref.context.result:
+                _display_local_results(eval_runtime_ref.context.result.output)
         except Exception as e:
             console.error(
                 f"Error: Unexpected error occurred - {str(e)}", include_traceback=True