diff --git a/.changeset/dsl-aware-mdma-il-prompt.md b/.changeset/dsl-aware-mdma-il-prompt.md
new file mode 100644
index 0000000..e4aa0d9
--- /dev/null
+++ b/.changeset/dsl-aware-mdma-il-prompt.md
@@ -0,0 +1,10 @@
+---
+"@mobile-reality/mdma-prompt-pack": minor
+---
+
+Make the `mobile-reality/mdma-il` author variant DSL-aware. The MDMA-IL model
+reads an MDMA-IL DSL intent, so its system prompt must describe the DSL grammar;
+the previous variant had none. `getAuthorPromptVariant('mobile-reality/mdma-il')`
+now returns the full authoring prompt — DSL input grammar, authoring rules, and
+worked form/table/chart examples — as the single source of truth (previously
+duplicated in the eval harness). The registry label/description are unchanged.
diff --git a/README.md b/README.md
index eddd62a..a0d8b09 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,8 @@
   <a href="https://mobilereality.github.io/mdma/#/docs"><b>📖 Docs</b></a>
   &nbsp;&nbsp;·&nbsp;&nbsp;
   <a href="https://discord.gg/etGSuCuR7B"><b>💬 Discord</b></a>
+  &nbsp;&nbsp;·&nbsp;&nbsp;
+  <a href="https://huggingface.co/MobileReality/mdma-gemma4-26b-dsl-unsloth-v1"><b>🤗 Model</b></a>
 </p>
 
 
@@ -63,6 +65,17 @@ onAction: submit
 ````
 
 
+## Speed comparison
+
+Same scenario, two models. GPT-5.5 and our own hosted MDMA-IL model.
+
+Our model is available on Hugging Face: [MobileReality/mdma-gemma4-26b-dsl-unsloth-v1](https://huggingface.co/MobileReality/mdma-gemma4-26b-dsl-unsloth-v1)
+
+| GPT-5.5 | Our own hosted model |
+| :---: | :---: |
+| <img src="assets/gpt-5.5.gif" width="100%"> | <img src="assets/own-model.gif" width="100%"> |
+
+
 ## MDMA_AUTHOR prompt matrix
 
 Each cell shows the pass rate of the model-specialized MDMA_AUTHOR prompt variant on the listed eval suite.
diff --git a/assets/gpt-5.5.gif b/assets/gpt-5.5.gif
new file mode 100644
index 0000000..638ec64
Binary files /dev/null and b/assets/gpt-5.5.gif differ
diff --git a/assets/own-model.gif b/assets/own-model.gif
new file mode 100644
index 0000000..06aaec6
Binary files /dev/null and b/assets/own-model.gif differ
diff --git a/demo/src/AgentChatView.tsx b/demo/src/AgentChatView.tsx
index 658bb0f..c579fcb 100644
--- a/demo/src/AgentChatView.tsx
+++ b/demo/src/AgentChatView.tsx
@@ -1,10 +1,43 @@
-import { useRef, useEffect, useCallback } from 'react';
+import { useRef, useEffect, useCallback, useState } from 'react';
 import { useAgent } from './agent/use-agent.js';
 import { useAgentActionLog } from './agent/use-agent-action-log.js';
 import { AgentMessage } from './agent/AgentMessage.js';
 import { AgentSettings } from './agent/AgentSettings.js';
 import { ChatActionLog } from './chat/ChatActionLog.js';
 import { ChatInput } from './chat/ChatInput.js';
+import type { AssistantTurn, AgentDisplayTurn } from './agent/types.js';
+
+const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));
+
+// Scripted conversation for the auto-play demo (each entry is one user message).
+const DEMO_SCRIPT = [
+  'hi',
+  'generate sample form',
+  'sample chart',
+  'and table',
+  'whats Product Name',
+  'ok could make chart from this table',
+  'line pls',
+];
+
+/**
+ * Serialize the conversation to a raw transcript: the user's messages and the
+ * agent's PURE responses (conversational text + the generate_mdma document),
+ * with only `You:` / `Agent:` to mark who spoke — no other added labels.
+ */
+function buildRawTranscript(turns: AgentDisplayTurn[]): string {
+  return turns
+    .map((turn) => {
+      if (turn.role === 'user') return turn.hidden ? '' : `You:\n${turn.content}`;
+      const body = (turn as AssistantTurn).blocks
+        .map((b) => (b.type === 'tool_use' ? b.document : b.content))
+        .filter(Boolean)
+        .join('\n\n');
+      return `Agent:\n${body}`;
+    })
+    .filter(Boolean)
+    .join('\n\n');
+}
 
 export function AgentChatView() {
   const {
@@ -16,6 +49,7 @@ export function AgentChatView() {
     config,
     updateConfig,
     send,
+    sendText,
     stop,
     clear,
     inputRef,
@@ -23,6 +57,54 @@ export function AgentChatView() {
 
   const { events, isOpen: logOpen, setIsOpen: setLogOpen, clearEvents } = useAgentActionLog(turns);
 
+  const [copiedRaw, setCopiedRaw] = useState(false);
+  const handleCopyRaw = useCallback(async () => {
+    try {
+      await navigator.clipboard.writeText(buildRawTranscript(turns));
+      setCopiedRaw(true);
+      setTimeout(() => setCopiedRaw(false), 1500);
+    } catch {
+      /* clipboard unavailable */
+    }
+  }, [turns]);
+
+  // ── Auto-play demo ──────────────────────────────────────────────────────────
+  // Replays a scripted conversation through the real agent: types each message,
+  // sends it, waits for the full response, then the next. For demo recordings.
+  const [isPlaying, setIsPlaying] = useState(false);
+  const playingRef = useRef(false);
+  const handlePlayDemo = useCallback(async () => {
+    if (playingRef.current) {
+      // already running → stop
+      playingRef.current = false;
+      setIsPlaying(false);
+      return;
+    }
+    playingRef.current = true;
+    setIsPlaying(true);
+    clear();
+    clearEvents();
+    await sleep(500);
+
+    for (const msg of DEMO_SCRIPT) {
+      if (!playingRef.current) break;
+      // Typewriter the message into the input for a natural look.
+      for (let k = 1; k <= msg.length; k++) {
+        if (!playingRef.current) break;
+        setInput(msg.slice(0, k));
+        await sleep(28);
+      }
+      await sleep(350);
+      if (!playingRef.current) break;
+      setInput('');
+      await sendText(msg); // renders the user bubble + awaits the agent's reply
+      await sleep(1100); // beat between turns
+    }
+
+    playingRef.current = false;
+    setIsPlaying(false);
+  }, [clear, clearEvents, sendText, setInput]);
+
   const chatEndRef = useRef<HTMLDivElement>(null);
   const prevCountRef = useRef(turns.length);
 
@@ -72,6 +154,10 @@ export function AgentChatView() {
           isGenerating={isGenerating}
           hasMessages={turns.length > 0}
           inputRef={inputRef}
+          onCopyRaw={handleCopyRaw}
+          copiedRaw={copiedRaw}
+          onPlayDemo={handlePlayDemo}
+          isPlaying={isPlaying}
         />
       </div>
 
diff --git a/demo/src/PreviewView.tsx b/demo/src/PreviewView.tsx
index 496294c..615a9cb 100644
--- a/demo/src/PreviewView.tsx
+++ b/demo/src/PreviewView.tsx
@@ -1,13 +1,14 @@
-import { useRef, useEffect, useCallback, useState } from 'react';
-import { useAgent } from './agent/use-agent.js';
+import { useCallback, useEffect, useRef, useState } from 'react';
 import { AgentMessage } from './agent/AgentMessage.js';
 import { AgentSettings } from './agent/AgentSettings.js';
+import { useAgent } from './agent/use-agent.js';
 import { ChatInput } from './chat/ChatInput.js';
 import { BackendLogDrawer } from './preview/BackendLogPane.js';
 import { PreviewPanel } from './preview/PreviewPanel.js';
 import { clearSubmissionLog } from './preview/insurance-backend.js';
 import { INSURANCE_FLOW_PROMPT } from './preview/insurance-flow-prompt.js';
 import { useInsuranceFlow } from './preview/use-insurance-flow.js';
+import { usePreviewAutoplay } from './preview/use-preview-autoplay.js';
 import { usePreviewValidation } from './preview/use-preview-validation.js';
 
 function countToolUseBlocks(turns: ReturnType<typeof useAgent>['turns']): number {
@@ -29,6 +30,7 @@ export function PreviewView() {
     config,
     updateConfig,
     send,
+    sendText,
     sendHidden,
     stop,
     clear,
@@ -74,6 +76,16 @@ export function PreviewView() {
     insuranceFlow.reset();
   }, [clear, insuranceFlow]);
 
+  // Auto-play the full claim flow hands-free (kickoff message → fill & submit
+  // each step form). Mirrors the scripted demo in the Agent Chat view.
+  const { isPlaying, play } = usePreviewAutoplay({
+    previewState,
+    isGenerating,
+    sendText,
+    setInput,
+    reset: handleClear,
+  });
+
   return (
     <div className="preview-layout">
       <div className="preview-chat">
@@ -115,6 +127,8 @@ export function PreviewView() {
           isGenerating={isGenerating}
           hasMessages={turns.length > 0}
           inputRef={inputRef}
+          onPlayDemo={play}
+          isPlaying={isPlaying}
         />
       </div>
 
diff --git a/demo/src/agent/AgentMessage.tsx b/demo/src/agent/AgentMessage.tsx
index 422f8d9..7432ef7 100644
--- a/demo/src/agent/AgentMessage.tsx
+++ b/demo/src/agent/AgentMessage.tsx
@@ -1,8 +1,9 @@
-import { memo } from 'react';
+import { memo, useState } from 'react';
 import type { ReactNode } from 'react';
 import { MdmaDocument } from '@mobile-reality/mdma-renderer-react';
 import { customizations } from '../custom-components.js';
 import type {
+  AgentBlock,
   AgentDisplayTurn,
   AssistantTurn,
   ThinkingBlock,
@@ -10,6 +11,15 @@ import type {
   ToolUseBlock,
 } from './types.js';
 
+// Raw dump of an assistant turn — the pure model output (conversational text +
+// the generate_mdma document), with no added labels, for debugging messaging.
+function buildRawDump(blocks: AgentBlock[]): string {
+  return blocks
+    .map((b) => (b.type === 'tool_use' ? b.document : b.content))
+    .filter(Boolean)
+    .join('\n\n');
+}
+
 // ── Inline markdown renderer ──────────────────────────────────────────────────
 
 function parseInline(text: string): ReactNode {
@@ -270,6 +280,8 @@ export const AgentMessage = memo(function AgentMessage({
   activeToolUseId,
   onSelectToolUse,
 }: AgentMessageProps) {
+  const [showRaw, setShowRaw] = useState(false);
+
   if (turn.role === 'user') {
     if (turn.hidden) return null;
     return (
@@ -286,32 +298,53 @@ export const AgentMessage = memo(function AgentMessage({
 
   const { blocks } = turn as AssistantTurn;
 
+  const hasContent = blocks.some(
+    (b) => (b.type === 'text' || b.type === 'thinking' ? b.content : (b as ToolUseBlock).document),
+  );
+
   return (
     <div className="chat-msg chat-msg--assistant">
       <div className="chat-msg-header">
         <span className="chat-msg-label">Agent</span>
-      </div>
-      <div className="chat-msg-body agent-blocks">
-        {blocks.length === 0 ? (
-          <span className="chat-msg-typing">Starting…</span>
-        ) : (
-          blocks.map((block) => {
-            if (block.type === 'thinking')
-              return <ThinkingBlockView key={block.id} block={block} />;
-            if (block.type === 'text') return <TextBlockView key={block.id} block={block} />;
-            if (block.type === 'tool_use')
-              return (
-                <ToolUseBlockView
-                  key={block.id}
-                  block={block}
-                  compact={compactToolUse}
-                  isActive={activeToolUseId === block.id}
-                  onSelect={onSelectToolUse ? () => onSelectToolUse(block.id) : undefined}
-                />
-              );
-          })
+        {hasContent && (
+          <button
+            type="button"
+            className="agent-raw-toggle"
+            data-active={showRaw ? 'true' : undefined}
+            onClick={() => setShowRaw((v) => !v)}
+            title="Show the raw model output (text + generate_mdma document) for debugging"
+          >
+            {showRaw ? 'Hide raw' : 'Raw'}
+          </button>
         )}
       </div>
+      {showRaw ? (
+        <div className="chat-msg-body agent-blocks">
+          <pre className="agent-raw">{buildRawDump(blocks)}</pre>
+        </div>
+      ) : (
+        <div className="chat-msg-body agent-blocks">
+          {blocks.length === 0 ? (
+            <span className="chat-msg-typing">Starting…</span>
+          ) : (
+            blocks.map((block) => {
+              if (block.type === 'thinking')
+                return <ThinkingBlockView key={block.id} block={block} />;
+              if (block.type === 'text') return <TextBlockView key={block.id} block={block} />;
+              if (block.type === 'tool_use')
+                return (
+                  <ToolUseBlockView
+                    key={block.id}
+                    block={block}
+                    compact={compactToolUse}
+                    isActive={activeToolUseId === block.id}
+                    onSelect={onSelectToolUse ? () => onSelectToolUse(block.id) : undefined}
+                  />
+                );
+            })
+          )}
+        </div>
+      )}
     </div>
   );
 });
diff --git a/demo/src/agent/AgentSettings.tsx b/demo/src/agent/AgentSettings.tsx
index bc1cb8f..06ff712 100644
--- a/demo/src/agent/AgentSettings.tsx
+++ b/demo/src/agent/AgentSettings.tsx
@@ -3,6 +3,8 @@ import { AUTHOR_PROMPT_VARIANTS } from '@mobile-reality/mdma-prompt-pack';
 import { getDefaultPromptVariantForModel } from '../model-prompt-variant.js';
 import type { AnthropicConfig } from './anthropic-client.js';
 
+const OWN_MODEL_URL_PLACEHOLDER = 'https://your-deployment.example.com/v1';
+
 const PROVIDER_MODELS: Record<string, Array<{ value: string; label: string }>> = {
   anthropic: [
     { value: 'claude-opus-4-7', label: 'claude-opus-4.7' },
@@ -38,12 +40,21 @@ const PROVIDER_MODELS: Record<string, Array<{ value: string; label: string }>> =
     { value: 'x-ai/grok-4.20', label: 'x-ai/grok-4.20' },
     { value: 'x-ai/grok-4.3', label: 'x-ai/grok-4.3' },
   ],
+  'own-model': [{ value: 'mdma-26b', label: 'mdma-26b (our MDMA model)' }],
+};
+
+const PROVIDER_LABELS: Record<string, string> = {
+  anthropic: 'anthropic',
+  openai: 'openai',
+  openrouter: 'openrouter',
+  'own-model': 'own MDMA model',
 };
 
 const DEFAULT_MODELS: Record<string, string> = {
   anthropic: 'claude-sonnet-4-6',
   openai: 'gpt-5.5',
   openrouter: 'google/gemini-3.1-pro-preview',
+  'own-model': 'mdma-26b',
 };
 
 const API_KEY_LABELS: Record<string, string> = {
@@ -80,7 +91,9 @@ export const AgentSettings = memo(function AgentSettings({ config, onUpdate }: A
 
   const provider = config.provider ?? 'anthropic';
   const models = PROVIDER_MODELS[provider] ?? [];
-  const missingKey = !getApiKey(config, provider);
+  // Our own endpoint has auth off, so it never needs a key.
+  const needsKey = provider !== 'own-model';
+  const missingKey = needsKey && !getApiKey(config, provider);
 
   function switchProvider(next: NonNullable<AnthropicConfig['provider']>) {
     if (next === provider) return;
@@ -115,21 +128,35 @@ export const AgentSettings = memo(function AgentSettings({ config, onUpdate }: A
                   className={`ai-preset-btn ${provider === p ? 'ai-preset-btn--active' : ''}`}
                   onClick={() => switchProvider(p)}
                 >
-                  {p}
+                  {PROVIDER_LABELS[p] ?? p}
                 </button>
               ),
             )}
           </div>
           <div className="chat-settings-fields">
-            <label className="ai-setting">
-              <span>{API_KEY_LABELS[provider]}</span>
-              <input
-                type="password"
-                value={getApiKey(config, provider)}
-                onChange={(e) => onUpdate(apiKeyPatch(provider, e.target.value))}
-                placeholder={API_KEY_PLACEHOLDERS[provider]}
-              />
-            </label>
+            {needsKey ? (
+              <label className="ai-setting">
+                <span>{API_KEY_LABELS[provider]}</span>
+                <input
+                  type="password"
+                  value={getApiKey(config, provider)}
+                  onChange={(e) => onUpdate(apiKeyPatch(provider, e.target.value))}
+                  placeholder={API_KEY_PLACEHOLDERS[provider]}
+                />
+              </label>
+            ) : (
+              <label className="ai-setting">
+                <span>Model endpoint URL</span>
+                <input
+                  type="text"
+                  value={config.ownModelBaseUrl ?? ''}
+                  onChange={(e) => onUpdate({ ownModelBaseUrl: e.target.value })}
+                  placeholder={OWN_MODEL_URL_PLACEHOLDER}
+                  spellCheck={false}
+                  autoComplete="off"
+                />
+              </label>
+            )}
             <div className="ai-setting">
               <span>Model</span>
               <select
@@ -162,26 +189,42 @@ export const AgentSettings = memo(function AgentSettings({ config, onUpdate }: A
                 />
               </label>
             )}
-            <label className="ai-setting">
+            <div className="ai-setting">
               <span>System prompt variant</span>
-              <select
-                aria-label="System prompt variant"
-                value={config.systemPromptId ?? AUTHOR_PROMPT_VARIANTS[0].id}
-                onChange={(e) => onUpdate({ systemPromptId: e.target.value })}
-                title={
-                  AUTHOR_PROMPT_VARIANTS.find(
-                    (v) => v.id === (config.systemPromptId ?? AUTHOR_PROMPT_VARIANTS[0].id),
-                  )?.description
-                }
-              >
-                {AUTHOR_PROMPT_VARIANTS.map((v) => (
-                  <option key={v.id} value={v.id} title={v.description}>
-                    {v.label}
-                  </option>
-                ))}
-              </select>
-            </label>
+              {provider === 'own-model' ? (
+                // The own MDMA model uses its own fixed Gemma-aligned prompt, so
+                // the variant is locked.
+                <select aria-label="System prompt variant" value="mdma-dsl" disabled>
+                  <option value="mdma-dsl">MDMA DSL Variant</option>
+                </select>
+              ) : (
+                <select
+                  aria-label="System prompt variant"
+                  value={config.systemPromptId ?? AUTHOR_PROMPT_VARIANTS[0].id}
+                  onChange={(e) => onUpdate({ systemPromptId: e.target.value })}
+                  title={
+                    AUTHOR_PROMPT_VARIANTS.find(
+                      (v) => v.id === (config.systemPromptId ?? AUTHOR_PROMPT_VARIANTS[0].id),
+                    )?.description
+                  }
+                >
+                  {AUTHOR_PROMPT_VARIANTS.map((v) => (
+                    <option key={v.id} value={v.id} title={v.description}>
+                      {v.label}
+                    </option>
+                  ))}
+                </select>
+              )}
+            </div>
           </div>
+          {provider === 'own-model' && (
+            <p className="agent-settings-note">
+              The entire agent runs on your self-hosted <strong>MDMA model</strong> endpoint
+              (OpenAI-compatible, tool-calling enabled) — no third-party model is called. Enter the
+              deployed model URL above; leave it blank to use the default. The{' '}
+              <code>/v1</code> suffix is added automatically.
+            </p>
+          )}
           <p className="agent-settings-note agent-settings-note--storage">
             🔒 Your API key is stored in your browser&apos;s localStorage only. It is never sent to
             any server other than the AI provider you select.
diff --git a/demo/src/agent/anthropic-client.ts b/demo/src/agent/anthropic-client.ts
index c5bcc39..e8ec1cf 100644
--- a/demo/src/agent/anthropic-client.ts
+++ b/demo/src/agent/anthropic-client.ts
@@ -6,19 +6,38 @@
  */
 
 export interface AnthropicConfig {
-  provider?: 'anthropic' | 'openai' | 'openrouter';
+  /**
+   * 'own-model' routes the entire agent loop to our self-hosted mdma-26b
+   * endpoint (OpenAI-compatible, tool-calling enabled) — no third-party model.
+   */
+  provider?: 'anthropic' | 'openai' | 'openrouter' | 'own-model';
   /** Anthropic API key */
   apiKey: string;
   /** OpenAI API key — stored separately so switching providers doesn't clear it */
   openaiApiKey?: string;
   /** OpenRouter API key */
   openrouterApiKey?: string;
+  /**
+   * Base URL of the self-hosted MDMA model endpoint (OpenAI-compatible).
+   * Only used when provider is 'own-model'; falls back to
+   * OWN_MODEL_DEFAULT_BASE_URL when empty.
+   */
+  ownModelBaseUrl?: string;
   model: string;
   /** Token budget for extended thinking. Only used when provider is 'anthropic'. */
   thinkingBudget?: number;
   systemPromptId?: string;
 }
 
+/**
+ * Default endpoint for the self-hosted MDMA model, used when the user hasn't
+ * entered their own URL in Agent Settings. Overridable at build time via
+ * VITE_OWN_MODEL_BASE_URL.
+ */
+export const OWN_MODEL_DEFAULT_BASE_URL =
+  import.meta.env.VITE_OWN_MODEL_BASE_URL ??
+  'https://REDACTED.modal.run/v1';
+
 export interface ToolDefinition {
   name: string;
   description: string;
diff --git a/demo/src/agent/openai-agent-client.ts b/demo/src/agent/openai-agent-client.ts
index 6469449..5dbf5c9 100644
--- a/demo/src/agent/openai-agent-client.ts
+++ b/demo/src/agent/openai-agent-client.ts
@@ -25,10 +25,110 @@ export interface OpenAIToolCall {
 
 export type OpenAIMessage = OpenAIUserMessage | OpenAIAssistantMessage | OpenAIToolMessage;
 
-// text block always lives at index 0; tool calls at 1, 2, …
+// reasoning block lives at -1 (rendered first), text at 0, tool calls at 1, 2, …
+const REASONING_IDX = -1;
 const TEXT_IDX = 0;
 const TOOL_IDX_OFFSET = 1;
 
+// Safety limits so a stalled or runaway stream can never hang the UI. Our
+// self-hosted endpoint can emit an unbounded `delta.reasoning` channel (the
+// model's chain-of-thought); without these a non-terminating stream leaves the
+// agent loop awaiting forever and `isGenerating` stuck true.
+const IDLE_TIMEOUT_MS = 60_000; // no chunk for this long → assume the stream died
+const MAX_STREAM_MS = 240_000; // hard wall-clock ceiling for one response
+const MAX_STREAM_BYTES = 4_000_000; // ~4 MB of SSE text → runaway guard
+
+// Loop detector. Gemma 4's known repetition collapse (see
+// evals/own-model/repetition-loops.md) degrades a thinking block into a short
+// token/phrase flooding the budget. `min_p` + `repetition_penalty` cut most of
+// it, but the collapse is an unfixed model trait, so we keep a cheap safety
+// net: over a sliding window of recent words, a healthy stream is lexically
+// diverse; a degenerate loop (one token, or a cycle like
+// `(END) (DONE) (STOP) (FINAL) …`) collapses unique/total. Below the floor we
+// abort rather than let it eat the whole generation.
+//
+// We run this on BOTH channels. The collapse usually lives in `reasoning`, but
+// it can also leak onto `content`: the model emits a valid document, then keeps
+// going with a raw "Thinking Process:" ramble after the reasoning span has
+// already closed. A legit MDMA document is well above the diversity floor, so
+// guarding content does not false-positive on real output.
+const LOOP_WINDOW_WORDS = 160; // sliding window of recent words
+const LOOP_MIN_WORDS = 120; // don't judge until we have enough signal
+const LOOP_UNIQUE_RATIO = 0.15; // unique/total below this → degenerate loop
+
+/** Tracks recent words on one channel and flags a degenerate repetition loop. */
+class RepetitionLoopDetector {
+  private readonly words: string[] = [];
+
+  /** Feed a delta; returns true once the window collapses into a loop. */
+  push(text: string): boolean {
+    for (const w of text.split(/\s+/)) {
+      if (!w) continue;
+      this.words.push(w);
+      if (this.words.length > LOOP_WINDOW_WORDS) this.words.shift();
+    }
+    if (this.words.length < LOOP_MIN_WORDS) return false;
+    const unique = new Set(this.words).size;
+    return unique / this.words.length < LOOP_UNIQUE_RATIO;
+  }
+}
+
+const MDMA_FENCE_OPEN = '```mdma';
+const MDMA_FENCE_CLOSE = '```';
+
+// Strips leaked ```mdma fenced documents out of the assistant's chat (content)
+// channel. The real document always arrives via the generate_mdma tool call and
+// renders in the preview pane; the model occasionally ALSO transcribes a copy of
+// the document as raw markdown into chat (most often on the first turn). Those
+// fenced blocks must never reach the chat UI, which is prose-only. Operates on
+// the live stream: complete lines are classified as they arrive, and the
+// trailing partial line is held back only while it could still be the start of a
+// ```mdma fence — so normal prose keeps streaming smoothly.
+class MdmaFenceStripper {
+  private buf = ''; // text after the last emitted char (start-aligned to current line)
+  private inFence = false;
+  private partialEmitted = 0; // chars of the current unterminated line already emitted
+
+  /** Feed a content delta; returns only the text that is safe to show in chat. */
+  push(text: string): string {
+    this.buf += text;
+    let out = '';
+    let nl: number;
+    while ((nl = this.buf.indexOf('\n')) !== -1) {
+      const line = this.buf.slice(0, nl + 1);
+      this.buf = this.buf.slice(nl + 1);
+      const trimmed = line.trim();
+      if (this.inFence) {
+        if (trimmed === MDMA_FENCE_CLOSE) this.inFence = false;
+      } else if (trimmed.startsWith(MDMA_FENCE_OPEN)) {
+        this.inFence = true;
+      } else {
+        out += line.slice(this.partialEmitted);
+      }
+      this.partialEmitted = 0;
+    }
+    // Trailing partial line: emit eagerly unless it could still open a fence.
+    if (!this.inFence && this.buf.length > this.partialEmitted) {
+      const trimmed = this.buf.trim();
+      const couldOpenFence = trimmed.length > 0 && MDMA_FENCE_OPEN.startsWith(trimmed);
+      if (!couldOpenFence) {
+        out += this.buf.slice(this.partialEmitted);
+        this.partialEmitted = this.buf.length;
+      }
+    }
+    return out;
+  }
+
+  /** Emit any leftover at stream end (drops a dangling, never-closed fence). */
+  flush(): string {
+    const out = this.inFence ? '' : this.buf.slice(this.partialEmitted);
+    this.buf = '';
+    this.partialEmitted = 0;
+    this.inFence = false;
+    return out;
+  }
+}
+
 export async function* streamOpenAIAgentMessages(
   apiKey: string,
   model: string,
@@ -37,6 +137,8 @@ export async function* streamOpenAIAgentMessages(
   tools: ToolDefinition[],
   signal?: AbortSignal,
   baseUrl = 'https://api.openai.com/v1',
+  /** Extra request-body fields merged in (e.g. temperature, chat_template_kwargs). */
+  extraBody?: Record<string, unknown>,
 ): AsyncGenerator<AgentStreamEvent> {
   const openAITools = tools.map((t) => ({
     type: 'function' as const,
@@ -57,6 +159,7 @@ export async function* streamOpenAIAgentMessages(
         messages: [{ role: 'system', content: systemPrompt }, ...messages],
         tools: openAITools,
         tool_choice: 'auto',
+        ...extraBody,
       }),
       signal,
     });
@@ -82,12 +185,47 @@ export async function* streamOpenAIAgentMessages(
   let buf = '';
   let finishReason = 'stop';
   const startedBlocks = new Set<number>();
+  const startedAt = Date.now();
+  let totalBytes = 0;
+  const reasoningLoopDetector = new RepetitionLoopDetector();
+  const contentLoopDetector = new RepetitionLoopDetector();
+  const fenceStripper = new MdmaFenceStripper();
 
   try {
     while (true) {
-      const { done, value } = await reader.read();
+      // Race the read against an idle timer so a stalled stream can't hang.
+      const readPromise = reader.read();
+      readPromise.catch(() => {}); // swallow rejection if we cancel below
+      let idleTimer: ReturnType<typeof setTimeout> | undefined;
+      const idle = new Promise<'idle'>((resolve) => {
+        idleTimer = setTimeout(() => resolve('idle'), IDLE_TIMEOUT_MS);
+      });
+      const result = await Promise.race([readPromise, idle]);
+      clearTimeout(idleTimer);
+
+      if (result === 'idle') {
+        reader.cancel().catch(() => {});
+        yield {
+          type: 'stream_error',
+          message: `Stream stalled — no data for ${IDLE_TIMEOUT_MS / 1000}s. The model may be stuck; please try again.`,
+        };
+        return;
+      }
+
+      const { done, value } = result;
       if (done) break;
 
+      totalBytes += value?.byteLength ?? 0;
+      if (totalBytes > MAX_STREAM_BYTES || Date.now() - startedAt > MAX_STREAM_MS) {
+        reader.cancel().catch(() => {});
+        yield {
+          type: 'stream_error',
+          message:
+            'Stream exceeded safety limits (likely a runaway generation) and was stopped. Please try again.',
+        };
+        return;
+      }
+
       buf += decoder.decode(value, { stream: true });
       const lines = buf.split('\n');
       buf = lines.pop() ?? '';
@@ -113,12 +251,49 @@ export async function* streamOpenAIAgentMessages(
         if (finish) finishReason = finish;
         if (!delta) continue;
 
+        // Our model streams its chain-of-thought on a separate `reasoning`
+        // channel (OpenAI-compatible servers like vLLM expose it here). Render
+        // it as a collapsible thinking block instead of dropping it on the floor.
+        if (typeof delta.reasoning === 'string' && delta.reasoning) {
+          if (!startedBlocks.has(REASONING_IDX)) {
+            startedBlocks.add(REASONING_IDX);
+            yield { type: 'block_start', index: REASONING_IDX, blockType: 'thinking' };
+          }
+          yield { type: 'thinking_delta', index: REASONING_IDX, thinking: delta.reasoning };
+
+          if (reasoningLoopDetector.push(delta.reasoning)) {
+            reader.cancel().catch(() => {});
+            yield {
+              type: 'stream_error',
+              message:
+                'The model got stuck repeating itself while thinking and was stopped. Please try again.',
+            };
+            return;
+          }
+        }
+
         if (typeof delta.content === 'string' && delta.content) {
-          if (!startedBlocks.has(TEXT_IDX)) {
-            startedBlocks.add(TEXT_IDX);
-            yield { type: 'block_start', index: TEXT_IDX, blockType: 'text' };
+          // Strip any leaked ```mdma document; only prose reaches the chat UI.
+          const visible = fenceStripper.push(delta.content);
+          if (visible) {
+            if (!startedBlocks.has(TEXT_IDX)) {
+              startedBlocks.add(TEXT_IDX);
+              yield { type: 'block_start', index: TEXT_IDX, blockType: 'text' };
+            }
+            yield { type: 'text_delta', index: TEXT_IDX, text: visible };
+          }
+
+          // Feed the loop detector the RAW content so a runaway fenced block
+          // still trips it even though we never display the fence.
+          if (contentLoopDetector.push(delta.content)) {
+            reader.cancel().catch(() => {});
+            yield {
+              type: 'stream_error',
+              message:
+                'The model got stuck repeating itself and was stopped. Please try again.',
+            };
+            return;
           }
-          yield { type: 'text_delta', index: TEXT_IDX, text: delta.content };
         }
 
         const toolCalls = delta.tool_calls as Array<Record<string, unknown>> | undefined;
@@ -151,9 +326,26 @@ export async function* streamOpenAIAgentMessages(
       }
     }
   } finally {
-    reader.releaseLock();
+    // releaseLock throws if we already cancelled the reader — ignore that.
+    try {
+      reader.releaseLock();
+    } catch {
+      /* reader already released via cancel() */
+    }
+  }
+
+  // Emit any prose the stripper was holding back (e.g. a final line with no
+  // trailing newline that turned out not to be a fence).
+  const tail = fenceStripper.flush();
+  if (tail) {
+    if (!startedBlocks.has(TEXT_IDX)) {
+      startedBlocks.add(TEXT_IDX);
+      yield { type: 'block_start', index: TEXT_IDX, blockType: 'text' };
+    }
+    yield { type: 'text_delta', index: TEXT_IDX, text: tail };
   }
 
+  if (startedBlocks.has(REASONING_IDX)) yield { type: 'block_stop', index: REASONING_IDX };
   if (startedBlocks.has(TEXT_IDX)) yield { type: 'block_stop', index: TEXT_IDX };
   for (const tcIdx of Array.from(startedBlocks).filter((i) => i >= TOOL_IDX_OFFSET)) {
     yield { type: 'block_stop', index: tcIdx };
diff --git a/demo/src/agent/use-agent.ts b/demo/src/agent/use-agent.ts
index cfaf730..3362a33 100644
--- a/demo/src/agent/use-agent.ts
+++ b/demo/src/agent/use-agent.ts
@@ -2,11 +2,16 @@ import { useState, useCallback, useRef, useEffect } from 'react';
 import type { Dispatch, SetStateAction } from 'react';
 import {
   buildSystemPrompt,
+  buildFixerPrompt,
+  buildFixerMessage,
   getAuthorPromptVariant,
   getAgentToolPromptVariant,
+  MDMA_IL_AGENT_SYSTEM_PROMPT,
 } from '@mobile-reality/mdma-prompt-pack';
+import { validate } from '@mobile-reality/mdma-validator';
 import {
   streamAgentMessages,
+  OWN_MODEL_DEFAULT_BASE_URL,
   type AnthropicConfig,
   type ApiMessage,
   type ApiAssistantBlock,
@@ -65,6 +70,40 @@ const GENERATE_MDMA_TOOL_BRIEF = {
   },
 };
 
+// ── Own-model (mdma-26b) endpoint ─────────────────────────────────────────────
+// Our self-hosted model, served OpenAI-compatible with tool-calling enabled.
+// In "own-model" provider mode the WHOLE agent loop runs here (conversation +
+// generate_mdma via tool_choice:auto), so no third-party model is called.
+// Auth is off (placeholder key); enable_thinking must be false; temperature 1
+// for agentic/conversational use.
+const OWN_MODEL_NAME = import.meta.env.VITE_OWN_MODEL_NAME ?? 'mdma-26b';
+
+// The own-model endpoint is user-configurable in Agent Settings. Normalise what
+// they type: trim trailing slashes and append the OpenAI-compatible `/v1` suffix
+// if missing. Empty → fall back to the build-time default.
+function normalizeOwnModelBaseUrl(raw?: string): string {
+  const url = (raw ?? '').trim().replace(/\/+$/, '');
+  if (!url) return OWN_MODEL_DEFAULT_BASE_URL;
+  return url.endsWith('/v1') ? url : `${url}/v1`;
+}
+
+// Extra OpenAI-request body our endpoint needs (merged in by the OpenAI client).
+// `max_tokens` bounds the response server-side so a runaway reasoning channel
+// can't generate forever (the client also caps the stream defensively).
+//
+// `min_p` + `repetition_penalty` cut the degenerate reasoning repetition loop
+// (word-doubling → token-doubling → single-token flooding) that is a known
+// Gemma 4 trait — see evals/own-model/repetition-loops.md. `min_p` is the
+// primary tail-cutter; `repetition_penalty` starts low (raise only if needed —
+// too high hurts valid output). DRY would be ideal but vLLM doesn't support it.
+const OWN_MODEL_EXTRA_BODY = {
+  temperature: 1,
+  max_tokens: 8192,
+  min_p: 0.02,
+  repetition_penalty: 1.1,
+  chat_template_kwargs: { enable_thinking: false },
+} as const;
+
 // ── Config persistence ───────────────────────────────────────────────────────
 
 const CONFIG_KEY = 'mdma-agent-config';
@@ -202,8 +241,7 @@ async function callAuthorOpenAI(
   brief: string,
   signal: AbortSignal,
 ): Promise<string> {
-  const provider = config.provider ?? 'openai';
-  const baseUrl = OPENAI_COMPAT_BASE_URLS[provider] ?? OPENAI_COMPAT_BASE_URLS.openai!;
+  const baseUrl = getBaseUrlForProvider(config);
   const apiKey = getApiKeyForProvider(config);
   const response = await fetch(`${baseUrl}/chat/completions`, {
     method: 'POST',
@@ -387,6 +425,7 @@ async function runAgentLoop(
             if (meta.apiBlock.type === 'tool_use') meta.apiBlock.input = { document };
           }
 
+          document = await maybeFixDocument(config, document, signal);
           const parsed = await parseMarkdown(document).catch(() => null);
           const ast = parsed?.ast ?? null;
           const store = parsed?.store ?? null;
@@ -434,17 +473,103 @@ const OPENAI_COMPAT_BASE_URLS: Partial<Record<NonNullable<AnthropicConfig['provi
   openrouter: 'https://openrouter.ai/api/v1',
 };
 
+/**
+ * Resolve the OpenAI-compatible base URL for the configured provider. For
+ * 'own-model' this is the user-supplied endpoint (Agent Settings), normalised
+ * and falling back to the build-time default; otherwise the static map.
+ */
+function getBaseUrlForProvider(config: AnthropicConfig): string {
+  const provider = config.provider ?? 'openai';
+  if (provider === 'own-model') return normalizeOwnModelBaseUrl(config.ownModelBaseUrl);
+  return OPENAI_COMPAT_BASE_URLS[provider] ?? OPENAI_COMPAT_BASE_URLS.openai!;
+}
+
 function getApiKeyForProvider(config: AnthropicConfig): string {
   switch (config.provider) {
     case 'openai':
       return config.openaiApiKey ?? '';
     case 'openrouter':
       return config.openrouterApiKey ?? '';
+    case 'own-model':
+      return 'unused'; // auth is off on our endpoint; client just needs a non-empty key
     default:
       return config.apiKey;
   }
 }
 
+// ── Self-healing fixer ────────────────────────────────────────────────────────
+// When generate_mdma returns an invalid document, repair it before render: first
+// the validator's deterministic auto-fixes (free), then — for anything left — an
+// LLM fixer pass (same provider/model) using the canonical fixer prompt. The
+// model's varied slips (HTML-tag thinking, JSON-in-fence, wrong field keys) need
+// the LLM; a regex repair can't cover them.
+
+/** Provider-aware one-shot completion (system + user → text). */
+async function chatOnce(
+  config: AnthropicConfig,
+  system: string,
+  user: string,
+  signal: AbortSignal,
+): Promise<string> {
+  const provider = config.provider ?? 'anthropic';
+  if (provider === 'anthropic') return callAuthorAnthropic(config, system, user, signal);
+
+  const isOwn = provider === 'own-model';
+  const baseUrl = getBaseUrlForProvider(config);
+  const response = await fetch(`${baseUrl}/chat/completions`, {
+    method: 'POST',
+    headers: { 'content-type': 'application/json', authorization: `Bearer ${getApiKeyForProvider(config)}` },
+    body: JSON.stringify({
+      model: isOwn ? OWN_MODEL_NAME : config.model,
+      messages: [
+        { role: 'system', content: system },
+        { role: 'user', content: user },
+      ],
+      temperature: 0, // strict, deterministic repair
+      ...(isOwn ? { max_tokens: 4096, chat_template_kwargs: { enable_thinking: false } } : {}),
+    }),
+    signal,
+  });
+  if (!response.ok) throw new Error(`Fixer call failed (${response.status})`);
+  const json = (await response.json()) as { choices?: Array<{ message?: { content?: string } }> };
+  return json.choices?.[0]?.message?.content ?? '';
+}
+
+/**
+ * Return a valid (or best-effort repaired) MDMA document. No-ops when the input
+ * is already valid, so it adds zero latency on the common path.
+ */
+async function maybeFixDocument(
+  config: AnthropicConfig,
+  document: string,
+  signal: AbortSignal,
+): Promise<string> {
+  if (!document.trim()) return document;
+  // 1. Deterministic auto-fix.
+  const r = validate(document, { exclude: ['thinking-block'], autoFix: true });
+  if (r.ok) return r.output;
+
+  // 2. LLM fixer for the remaining issues.
+  const unfixed = r.issues.filter(
+    (i) => !i.fixed && (i.severity === 'error' || i.severity === 'warning'),
+  );
+  if (unfixed.length === 0) return r.output;
+
+  try {
+    const system = `${buildSystemPrompt()}\n\n---\n\n${buildFixerPrompt('single-block')}`;
+    const userMessage = buildFixerMessage(document, unfixed, {});
+    const fixed = await chatOnce(config, system, userMessage, signal);
+    if (fixed) {
+      // Accept only if it actually improves validity.
+      const after = validate(fixed, { exclude: ['thinking-block'], autoFix: true });
+      if (after.summary.errors <= r.summary.errors) return after.output;
+    }
+  } catch {
+    /* fixer failed — fall back to the deterministic best-effort below */
+  }
+  return r.output;
+}
+
 async function runOpenAIAgentLoop(
   config: AnthropicConfig,
   systemPrompt: string,
@@ -456,9 +581,11 @@ async function runOpenAIAgentLoop(
   nextId: () => string,
   subAgent: AuthorSubAgent | null,
 ): Promise<void> {
-  const baseUrl =
-    OPENAI_COMPAT_BASE_URLS[config.provider ?? 'openai'] ?? OPENAI_COMPAT_BASE_URLS.openai!;
+  const isOwnModel = config.provider === 'own-model';
+  const baseUrl = getBaseUrlForProvider(config);
   const apiKey = getApiKeyForProvider(config);
+  const model = isOwnModel ? OWN_MODEL_NAME : config.model;
+  const extraBody = isOwnModel ? OWN_MODEL_EXTRA_BODY : undefined;
   const tool = subAgent ? GENERATE_MDMA_TOOL_BRIEF : GENERATE_MDMA_TOOL_INLINE;
   let continueLoop = true;
 
@@ -471,12 +598,13 @@ async function runOpenAIAgentLoop(
 
     for await (const ev of streamOpenAIAgentMessages(
       apiKey,
-      config.model,
+      model,
       systemPrompt,
       history,
       [tool],
       signal,
       baseUrl,
+      extraBody,
     )) {
       if (ev.type === 'stream_error') {
         onError(ev.message);
@@ -486,7 +614,20 @@ async function runOpenAIAgentLoop(
 
       if (ev.type === 'block_start') {
         const displayId = nextId();
-        if (ev.blockType === 'text') {
+        if (ev.blockType === 'thinking') {
+          // Reasoning channel (delta.reasoning) → collapsible thinking block.
+          // Not added to the OpenAI history (only text + tool_calls are).
+          const apiBlock: ApiAssistantBlock = { type: 'thinking', thinking: '', signature: '' };
+          blockMeta.set(ev.index, { displayId, apiBlock });
+          setTurns((prev) =>
+            appendBlock(prev, assistantTurnId, {
+              id: displayId,
+              type: 'thinking',
+              content: '',
+              isStreaming: true,
+            } satisfies AgentBlock),
+          );
+        } else if (ev.blockType === 'text') {
           const apiBlock: ApiAssistantBlock = { type: 'text', text: '' };
           blockMeta.set(ev.index, { displayId, apiBlock });
           setTurns((prev) =>
@@ -532,6 +673,15 @@ async function runOpenAIAgentLoop(
         }
       }
 
+      if (ev.type === 'thinking_delta') {
+        const meta = blockMeta.get(ev.index);
+        if (meta?.apiBlock.type === 'thinking') {
+          meta.apiBlock.thinking += ev.thinking;
+          const snap = meta.apiBlock.thinking;
+          setTurns((prev) => patchBlock(prev, assistantTurnId, meta.displayId, { content: snap }));
+        }
+      }
+
       if (ev.type === 'input_json_delta') {
         const meta = blockMeta.get(ev.index);
         if (meta?.partialJson !== undefined) meta.partialJson += ev.partial_json;
@@ -579,6 +729,7 @@ async function runOpenAIAgentLoop(
             }
           }
 
+          document = await maybeFixDocument(config, document, signal);
           const parsed = await parseMarkdown(document).catch(() => null);
           setTurns((prev) =>
             patchBlock(prev, assistantTurnId, meta.displayId, {
@@ -749,23 +900,34 @@ export function useAgent(options: UseAgentOptions = {}) {
       ]);
 
       abortRef.current = new AbortController();
+      const provider = config.provider ?? 'anthropic';
+      // Our own model runs the whole turn itself (tool-calling enabled), so it
+      // emits the MDMA document inline via generate_mdma — no author sub-agent.
+      const useSubAgent = (options.useAuthorSubAgent ?? false) && provider !== 'own-model';
       const toolPrompt = getAgentToolPromptVariant(config.systemPromptId).prompt;
-      // In sub-agent mode the conversation agent never writes MDMA directly,
-      // so its system prompt omits the author prompt and the buildSystemPrompt
-      // reminder (both of which would tempt the agent to inline MDMA in chat).
-      const systemPrompt = options.useAuthorSubAgent
-        ? options.flowPrompt
+      // Our own model gets its own Gemma-aligned agentic prompt (no <thinking>
+      // leak — see prompt-pack mdma-agent/mobile-reality/mdma-il). Other
+      // providers: sub-agent mode uses just the tool prompt; inline mode layers
+      // the author prompt via buildSystemPrompt.
+      let systemPrompt: string;
+      if (provider === 'own-model') {
+        systemPrompt = options.flowPrompt
+          ? `${MDMA_IL_AGENT_SYSTEM_PROMPT}\n\n---\n\n${options.flowPrompt}`
+          : MDMA_IL_AGENT_SYSTEM_PROMPT;
+      } else if (useSubAgent) {
+        systemPrompt = options.flowPrompt
           ? `${toolPrompt}\n\n---\n\n${options.flowPrompt}`
-          : toolPrompt
-        : buildSystemPrompt({
-            authorPrompt: getAuthorPromptVariant(config.systemPromptId).prompt,
-            customPrompt: options.flowPrompt
-              ? `${toolPrompt}\n\n---\n\n${options.flowPrompt}`
-              : toolPrompt,
-          });
-
-      const subAgent = options.useAuthorSubAgent ? makeAuthorSubAgent(config) : null;
-      const provider = config.provider ?? 'anthropic';
+          : toolPrompt;
+      } else {
+        systemPrompt = buildSystemPrompt({
+          authorPrompt: getAuthorPromptVariant(config.systemPromptId).prompt,
+          customPrompt: options.flowPrompt
+            ? `${toolPrompt}\n\n---\n\n${options.flowPrompt}`
+            : toolPrompt,
+        });
+      }
+
+      const subAgent = useSubAgent ? makeAuthorSubAgent(config) : null;
 
       try {
         if (provider === 'anthropic') {
@@ -824,6 +986,15 @@ export function useAgent(options: UseAgentOptions = {}) {
     [runTurn],
   );
 
+  // Send a specific message as a visible user turn and resolve when the agent's
+  // response is fully complete. Used by the auto-play demo to pace the script.
+  const sendText = useCallback(
+    async (text: string) => {
+      await runTurn(text, false);
+    },
+    [runTurn],
+  );
+
   const stop = useCallback(() => {
     abortRef.current?.abort();
   }, []);
@@ -848,6 +1019,7 @@ export function useAgent(options: UseAgentOptions = {}) {
     updateConfig,
     send,
     sendHidden,
+    sendText,
     stop,
     clear,
     inputRef,
diff --git a/demo/src/chat/ChatInput.tsx b/demo/src/chat/ChatInput.tsx
index 359326a..9276f43 100644
--- a/demo/src/chat/ChatInput.tsx
+++ b/demo/src/chat/ChatInput.tsx
@@ -13,6 +13,14 @@ export interface ChatInputProps {
   disabled?: boolean;
   /** Placeholder text override. */
   placeholder?: string;
+  /** Copy the whole raw conversation to the clipboard (debugging). */
+  onCopyRaw?: () => void;
+  /** Briefly true right after a successful copy, for button feedback. */
+  copiedRaw?: boolean;
+  /** Start/stop the scripted auto-play demo. */
+  onPlayDemo?: () => void;
+  /** True while the auto-play demo is running. */
+  isPlaying?: boolean;
 }
 
 export const ChatInput = memo(function ChatInput({
@@ -26,8 +34,12 @@ export const ChatInput = memo(function ChatInput({
   inputRef,
   disabled,
   placeholder,
+  onCopyRaw,
+  copiedRaw,
+  onPlayDemo,
+  isPlaying,
 }: ChatInputProps) {
-  const isDisabled = disabled && !isGenerating;
+  const isDisabled = (disabled && !isGenerating) || Boolean(isPlaying);
 
   return (
     <div className="chat-input-bar">
@@ -48,6 +60,27 @@ export const ChatInput = memo(function ChatInput({
           }}
         />
         <div className="chat-input-actions">
+          {onPlayDemo && (
+            <button
+              type="button"
+              className="chat-demo-btn"
+              data-active={isPlaying ? 'true' : undefined}
+              onClick={onPlayDemo}
+              title="Auto-play a scripted conversation (for demo recordings)"
+            >
+              {isPlaying ? '■ Stop demo' : '▶ Play demo'}
+            </button>
+          )}
+          {hasMessages && onCopyRaw && (
+            <button
+              type="button"
+              className="chat-clear-btn"
+              onClick={onCopyRaw}
+              title="Copy the whole raw conversation (text + generate_mdma documents) for debugging"
+            >
+              {copiedRaw ? 'Copied!' : 'Copy raw'}
+            </button>
+          )}
           {hasMessages && (
             <button
               type="button"
diff --git a/demo/src/preview/insurance-flow-prompt.ts b/demo/src/preview/insurance-flow-prompt.ts
index c49d64a..ead1e80 100644
--- a/demo/src/preview/insurance-flow-prompt.ts
+++ b/demo/src/preview/insurance-flow-prompt.ts
@@ -31,4 +31,10 @@ Rules:
 - The very first assistant turn is the exception — emit step 1 immediately on the first user message, no \`[system]\` message required.
 - If the user chats between steps ("is this it?", "what about my address?", "ok thanks", etc.), they are still on the current step. Answer in plain conversation only and **wait** for the \`[system]\` advance message before calling the tool again.
 - Use the **exact** ids and \`onSubmit\` action labels listed above. Don't regenerate previously-shown components. Don't add extras (no buttons, webhooks, callouts beyond what each step requires).
+
+### One step per turn — no look-ahead
+Each turn renders exactly **one** step, and your visible text **and** the component you emit must both be about the **same** step:
+- Figure out which step you are on: the step named in the most recent \`[system]\` message (or step 1 on the very first user message). Emit that step's component using its exact id, \`onSubmit\`, and field list from the spec above — nothing from an earlier or later step.
+- Your visible sentence introduces **only the current step's** form. Never mention, ask for, or preview a later step's data. For example, while rendering the \`claim-description-form\` (step 2), do **not** mention bank details, IBAN, or "next we'll need…" — the IBAN belongs to step 3 and is introduced only when you emit the \`bank-account-form\`.
+- The form you render and the sentence you write must match. If your text talks about the IBAN, the form you emit must be the \`bank-account-form\` — never a mismatch where the prose is one step ahead of (or behind) the rendered form.
 `;
diff --git a/demo/src/preview/use-preview-autoplay.ts b/demo/src/preview/use-preview-autoplay.ts
new file mode 100644
index 0000000..1108ced
--- /dev/null
+++ b/demo/src/preview/use-preview-autoplay.ts
@@ -0,0 +1,187 @@
+import type { FormComponent, FormField, MdmaComponent } from '@mobile-reality/mdma-spec';
+import { useCallback, useRef, useState } from 'react';
+import type { PreviewState } from './use-preview-validation.js';
+
+/**
+ * Auto-play for the insurance-claim preview — the form-driven analogue of the
+ * scripted `DEMO_SCRIPT` in AgentChatView.
+ *
+ * Where the agent chat demo only types and sends text, this flow is interactive:
+ * the agent renders a form, the user fills + submits it, and `useInsuranceFlow`
+ * advances to the next step. So auto-play here (1) sends a kickoff message, then
+ * (2) for each rendered step form, fills its fields with scripted answers and
+ * dispatches the submit `ACTION_TRIGGERED` on the live store — exactly what a
+ * human clicking "Submit" does. `useInsuranceFlow` (subscribed to the same
+ * store) then calls the backend and advances the agent to the next step.
+ *
+ * This lets you run the same custom claim flow hands-free against the own-model
+ * (Agent Settings) and compare it to frontier models.
+ */
+const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));
+
+const KICKOFF = "Hi — I'd like to file a new insurance claim.";
+
+// The flow's three form steps, in order (step 4 is a terminal callout — no form).
+const FLOW_ACTIONS = ['collect-personal-info', 'collect-claim', 'collect-bank'] as const;
+
+// Scripted answers keyed by field name; falls back to a value by field type.
+const ANSWERS: Record<string, string> = {
+  'full-name': 'Jamie Rivera',
+  birthday: '1988-03-22',
+  'claim-description':
+    'A pipe under the kitchen sink burst overnight and flooded the floor, damaging the lower cabinets and the flooring.',
+  iban: 'DE89370400440532013000',
+};
+
+function answerForField(field: FormField): string | boolean {
+  if (field.name in ANSWERS) return ANSWERS[field.name];
+  switch (field.type) {
+    case 'checkbox':
+      return true;
+    case 'number':
+      return '42';
+    case 'email':
+      return 'jamie.rivera@example.com';
+    case 'date':
+      return '1990-01-01';
+    case 'textarea':
+      return 'Additional details provided for the demo run.';
+    case 'select': {
+      const opts = field.options;
+      if (Array.isArray(opts) && opts.length > 0) {
+        const first = opts[0];
+        return typeof first === 'string' ? first : first.value;
+      }
+      return '';
+    }
+    case 'file':
+      return '';
+    default:
+      return 'Sample value';
+  }
+}
+
+function findForm(ast: PreviewState['ast']): FormComponent | null {
+  if (!ast) return null;
+  for (const child of ast.children) {
+    if ((child as { type?: string }).type !== 'mdmaBlock') continue;
+    const component = (child as { component?: MdmaComponent }).component;
+    if (component?.type === 'form') return component as FormComponent;
+  }
+  return null;
+}
+
+interface UsePreviewAutoplayOptions {
+  previewState: PreviewState;
+  isGenerating: boolean;
+  sendText: (message: string) => Promise<void>;
+  setInput: (value: string) => void;
+  /** Resets the chat, flow state and backend log before a fresh run. */
+  reset: () => void;
+}
+
+export function usePreviewAutoplay({
+  previewState,
+  isGenerating,
+  sendText,
+  setInput,
+  reset,
+}: UsePreviewAutoplayOptions): { isPlaying: boolean; play: () => void } {
+  const [isPlaying, setIsPlaying] = useState(false);
+  const playingRef = useRef(false);
+
+  // Keep the latest reactive values readable from inside the async play loop.
+  const stateRef = useRef(previewState);
+  stateRef.current = previewState;
+  const generatingRef = useRef(isGenerating);
+  generatingRef.current = isGenerating;
+
+  // Poll `predicate` until it returns a truthy value, play is stopped, or timeout.
+  const waitFor = useCallback(
+    async <T>(predicate: () => T | null, timeoutMs = 60_000): Promise<T | null> => {
+      const start = Date.now();
+      while (Date.now() - start < timeoutMs) {
+        if (!playingRef.current) return null;
+        const result = predicate();
+        if (result) return result;
+        await sleep(150);
+      }
+      return null;
+    },
+    [],
+  );
+
+  const play = useCallback(() => {
+    // Toggle: a second press stops the run.
+    if (playingRef.current) {
+      playingRef.current = false;
+      setIsPlaying(false);
+      return;
+    }
+    playingRef.current = true;
+    setIsPlaying(true);
+
+    void (async () => {
+      reset();
+      await sleep(500);
+
+      // Typewriter the kickoff message into the input, then send it.
+      for (let k = 1; k <= KICKOFF.length; k++) {
+        if (!playingRef.current) break;
+        setInput(KICKOFF.slice(0, k));
+        await sleep(22);
+      }
+      await sleep(250);
+      setInput('');
+      if (playingRef.current) await sendText(KICKOFF);
+
+      let lastBlockId: string | null = null;
+      for (const action of FLOW_ACTIONS) {
+        if (!playingRef.current) break;
+
+        // Wait for a freshly-rendered, validated form for this step.
+        const found = await waitFor(() => {
+          if (generatingRef.current) return null;
+          const s = stateRef.current;
+          if (s.status !== 'ready' || !s.ast || !s.store || s.blockId === lastBlockId) return null;
+          const form = findForm(s.ast);
+          if (!form || form.onSubmit !== action) return null;
+          return { form, store: s.store, blockId: s.blockId };
+        });
+        if (!found) {
+          console.warn(`[preview-autoplay] timed out waiting for "${action}" form`);
+          break;
+        }
+
+        // Fill each field, then submit — same dispatches a human click produces.
+        await sleep(450);
+        for (const field of found.form.fields) {
+          if (!playingRef.current) break;
+          found.store.dispatch({
+            type: 'FIELD_CHANGED',
+            componentId: found.form.id,
+            field: field.name,
+            value: answerForField(field),
+          });
+          await sleep(130);
+        }
+        await sleep(300);
+        if (!playingRef.current) break;
+        found.store.dispatch({
+          type: 'ACTION_TRIGGERED',
+          componentId: found.form.id,
+          actionId: found.form.onSubmit,
+        });
+        lastBlockId = found.blockId;
+
+        // Let useInsuranceFlow call the backend + advance the agent to the next step.
+        await sleep(1000);
+      }
+
+      playingRef.current = false;
+      setIsPlaying(false);
+    })();
+  }, [reset, sendText, setInput, waitFor]);
+
+  return { isPlaying, play };
+}
diff --git a/demo/src/styles.css b/demo/src/styles.css
index 872e92b..1acb0b4 100644
--- a/demo/src/styles.css
+++ b/demo/src/styles.css
@@ -1628,6 +1628,49 @@ body {
   cursor: pointer;
 }
 
+/* Toggle row (e.g. "Generate with Own model") — label left, switch right. */
+.ai-setting--toggle {
+  flex-direction: row;
+  align-items: center;
+  justify-content: space-between;
+  gap: 10px;
+}
+
+.ai-setting--toggle input[type='checkbox'] {
+  appearance: none;
+  -webkit-appearance: none;
+  flex: 0 0 auto;
+  width: 38px;
+  height: 22px;
+  padding: 0;
+  border: none;
+  border-radius: 999px;
+  background: #d1d5db;
+  position: relative;
+  cursor: pointer;
+  transition: background 0.15s;
+}
+
+.ai-setting--toggle input[type='checkbox']::after {
+  content: '';
+  position: absolute;
+  top: 2px;
+  left: 2px;
+  width: 18px;
+  height: 18px;
+  border-radius: 50%;
+  background: #fff;
+  transition: transform 0.15s;
+}
+
+.ai-setting--toggle input[type='checkbox']:checked {
+  background: #6c5ce7;
+}
+
+.ai-setting--toggle input[type='checkbox']:checked::after {
+  transform: translateX(16px);
+}
+
 .ai-setting-model-group {
   display: flex;
   gap: 6px;
@@ -1780,6 +1823,48 @@ body {
   line-height: 1.5;
 }
 
+/* Per-message "Raw" debug toggle + raw output view */
+.agent-raw-toggle {
+  font-size: 10px;
+  font-weight: 700;
+  text-transform: uppercase;
+  letter-spacing: 0.5px;
+  color: #6c5ce7;
+  background: transparent;
+  border: 1px solid #d6d2f5;
+  border-radius: 5px;
+  padding: 2px 7px;
+  cursor: pointer;
+  transition:
+    background 0.15s,
+    color 0.15s;
+}
+
+.agent-raw-toggle:hover {
+  background: #efecfd;
+}
+
+.agent-raw-toggle[data-active="true"] {
+  background: #6c5ce7;
+  color: #fff;
+  border-color: #6c5ce7;
+}
+
+.agent-raw {
+  font-size: 12px;
+  font-family: "SF Mono", "Fira Code", "Cascadia Code", monospace;
+  white-space: pre-wrap;
+  word-break: break-word;
+  color: #e0e0e0;
+  background: #1e1e2e;
+  margin: 0;
+  padding: 14px;
+  border-radius: 8px;
+  line-height: 1.55;
+  overflow: auto;
+  max-height: 520px;
+}
+
 .chat-msg-source {
   font-size: 12.5px;
   font-family: "SF Mono", "Fira Code", "Cascadia Code", monospace;
@@ -1992,6 +2077,31 @@ body {
   background: #fef2f2;
 }
 
+.chat-demo-btn {
+  padding: 6px 12px;
+  font-size: 12px;
+  font-weight: 600;
+  border: 1px solid #d6d2f5;
+  border-radius: 8px;
+  background: transparent;
+  color: #6c5ce7;
+  cursor: pointer;
+  transition: all 0.15s;
+  white-space: nowrap;
+  margin-right: auto;
+}
+
+.chat-demo-btn:hover {
+  background: #efecfd;
+  border-color: #6c5ce7;
+}
+
+.chat-demo-btn[data-active="true"] {
+  background: #6c5ce7;
+  color: #fff;
+  border-color: #6c5ce7;
+}
+
 .chat-send-btn {
   padding: 7px 18px;
   font-size: 13.5px;
diff --git a/evals/.env.example b/evals/.env.example
index 4d3940c..5cd19c5 100644
--- a/evals/.env.example
+++ b/evals/.env.example
@@ -38,9 +38,26 @@ OPENROUTER_API_KEY=
 #EVAL_PROVIDER=openrouter:google/gemini-2.5-pro
 #EVAL_PROVIDER=openrouter:google/gemini-2.5-flash
 #EVAL_PROVIDER=openrouter:google/gemini-2.5-flash-lite
+# Gemma (open weights) — all use the google/gemma prompt variant
+#EVAL_PROVIDER=openrouter:google/gemma-4-26b-a4b-it
+#EVAL_PROVIDER=openrouter:google/gemma-4-31b-it
+#EVAL_PROVIDER=openrouter:google/gemma-3n-e4b-it
 
 # --- xAI (via OpenRouter) ---
 #EVAL_PROVIDER=openrouter:x-ai/grok-4.20
 #EVAL_PROVIDER=openrouter:x-ai/grok-4.3
 
 EVAL_PROVIDER=openai:gpt-5.5
+
+# --- Our own model (evals/own-model/) ---
+# Self-hosted MDMA-IL model (Gemma-4-E4B + v3 LoRA). The own-model suite reads
+# these dedicated vars instead of EVAL_PROVIDER, so it can run independently of
+# the third-party model evals.
+#   OWN_MODEL_PROVIDER  promptfoo provider id (OpenAI-compatible chat).
+#   OWN_MODEL_BASE_URL  the model's OpenAI-compatible base URL (ends in /v1).
+#   OWN_MODEL_API_KEY   the API key / token for the endpoint.
+# If the endpoint instead uses proxy-auth headers, add a `headers:` block to the
+# provider config in own-model/promptfooconfig.* instead of using OWN_MODEL_API_KEY.
+OWN_MODEL_PROVIDER=openai:chat:mdma-il-v3
+OWN_MODEL_BASE_URL=
+OWN_MODEL_API_KEY=
diff --git a/evals/.gitignore b/evals/.gitignore
new file mode 100644
index 0000000..f1814c1
--- /dev/null
+++ b/evals/.gitignore
@@ -0,0 +1,3 @@
+
+# gemma eval suite + dataset generator — kept local for dataset generation, not published
+gemma/
diff --git a/evals/own-model/README.md b/evals/own-model/README.md
new file mode 100644
index 0000000..40bb77c
--- /dev/null
+++ b/evals/own-model/README.md
@@ -0,0 +1,70 @@
+# Own-model eval — MDMA-IL DSL holdout gate
+
+Self-contained eval for **our own hosted model** — `google/gemma-4-E4B-it` + the
+**v3 MDMA-IL LoRA**.
+
+## What this tests
+
+Our model is **not** an NL chat model — it was fine-tuned to take **one MDMA-IL
+DSL intent** as input and return an **MDMA document**. So this suite is a
+**DSL holdout gate**, not the NL author suites the third-party models run:
+
+- **Input:** the 95 held-out scenarios in **DSL** form
+  (`../gemma/dataset/data/holdout-dsl.jsonl`, via `tests-dsl.mjs`).
+- **System prompt:** the `mobile-reality/mdma-il` author prompt from the prompt
+  pack — DSL input grammar + authoring rules + worked form/table/chart examples.
+- **Assertion:** `validate-mdma` — every output must be a valid MDMA document.
+
+## Why a DSL-aware prompt (not a bare instruction)
+
+The system prompt **must describe the MDMA-IL DSL** the model reads — a bare
+"generate MDMA" instruction is out-of-distribution, since the model's whole job
+is to interpret a DSL intent. Empirically measured against this endpoint:
+
+1. **The DSL grammar is required.** Without the grammar section the model
+   misreads the intent and drops `type:`/`id:`, nests under a `form:` key, or
+   hallucinates `type: action`.
+2. **A worked example anchors the output shape.** On the DSL holdout, validity
+   moved from ~41% (bare instruction) to ~90.5% once the prompt carried the DSL
+   grammar plus a worked example.
+
+The small E4B model has only a 2048-token context (`max_model_len`), so the
+prompt stays lean while still teaching the DSL — grammar + a few examples, not a
+full spec dump.
+
+## Observations (not conclusions)
+
+This is a **small model** (Gemma 4 E4B + LoRA) — prompt-sensitive, with a
+2048-token context. In short, on the DSL holdout, output validity against
+the **current** validator moved with the system prompt: ~41% (bare prompt) →
+~90.5% (DSL-aware prompt with a worked example). It is **not 100%**, and we have
+**not** concluded whether the residual gap calls for a retrain, output
+normalization, or more prompt work — that's an open question.
+
+## Configure & run
+
+Set in `../.env` (dedicated vars, not `EVAL_PROVIDER`):
+
+```
+OWN_MODEL_PROVIDER=openai:chat:mdma-v3                              # served LoRA id
+OWN_MODEL_BASE_URL=https://…modal.run/v1                            # OpenAI-compatible base URL
+OWN_MODEL_API_KEY=EMPTY                                             # placeholder while auth is off
+```
+
+```bash
+pnpm --filter @mobile-reality/mdma-evals eval:own-model   # run the gate
+pnpm --filter @mobile-reality/mdma-evals eval:view        # view results
+```
+
+If `holdout-dsl.jsonl` is missing (it's gitignored/generated), build it first
+with `pnpm --filter @mobile-reality/mdma-evals dataset:build`, or point
+`OWN_MODEL_HOLDOUT` at your copy.
+
+## Contents
+
+- `promptfooconfig.own-model.yaml` — the gate config.
+- `prompt.mjs` — pins the `mobile-reality/mdma-il` system prompt; passes the
+  DSL as the user message.
+- `tests-dsl.mjs` — loads the DSL holdout into promptfoo test cases.
+- `assertions/` — own copy of the assertion modules (self-contained).
+- `results.json` — output of the last run (committed, reusable downstream).
diff --git a/evals/own-model/assertions/bar-chart.mjs b/evals/own-model/assertions/bar-chart.mjs
new file mode 100644
index 0000000..a37fa16
--- /dev/null
+++ b/evals/own-model/assertions/bar-chart.mjs
@@ -0,0 +1,13 @@
+/**
+ * Asserts that the output contains a bar chart variant.
+ */
+export default function (output) {
+  if (
+    output.includes('variant: bar') ||
+    output.includes("variant: 'bar'") ||
+    output.includes('"bar"')
+  ) {
+    return { pass: true, score: 1, reason: 'Bar chart variant found' };
+  }
+  return { pass: false, score: 0, reason: 'Expected variant: bar in chart component' };
+}
diff --git a/evals/own-model/assertions/callout-variant.mjs b/evals/own-model/assertions/callout-variant.mjs
new file mode 100644
index 0000000..4907119
--- /dev/null
+++ b/evals/own-model/assertions/callout-variant.mjs
@@ -0,0 +1,21 @@
+/**
+ * Asserts that the output contains a callout with the expected variant.
+ * Pass the variant name via config.variant (e.g. config: { variant: warning }).
+ */
+export default function (output, { config }) {
+  const variant = config?.variant || 'warning';
+  const hasCallout = output.includes('type: callout');
+  const hasVariant =
+    output.includes(`variant: ${variant}`) ||
+    output.includes(`variant: '${variant}'`) ||
+    output.includes(`variant: "${variant}"`);
+
+  if (hasCallout && hasVariant) {
+    return { pass: true, score: 1, reason: `Callout with variant: ${variant} found` };
+  }
+  return {
+    pass: false,
+    score: hasCallout ? 0.5 : 0,
+    reason: `Expected callout with variant: ${variant}. ${!hasCallout ? 'No callout found' : 'Wrong variant'}`,
+  };
+}
diff --git a/evals/own-model/assertions/calls-generate-mdma.mjs b/evals/own-model/assertions/calls-generate-mdma.mjs
new file mode 100644
index 0000000..e09e707
--- /dev/null
+++ b/evals/own-model/assertions/calls-generate-mdma.mjs
@@ -0,0 +1,58 @@
+/**
+ * Asserts that the model called the `generate_mdma` tool.
+ *
+ * Checks the output and raw response in all known locations promptfoo may
+ * place tool-call data, so this works regardless of the provider or how
+ * promptfoo serialises the tool call response.
+ *
+ * Optional config:
+ *   - shouldCall: boolean (default true) — set to false to assert that the
+ *     model did NOT call the tool (e.g. for conversational / info requests).
+ */
+export default function (output, context) {
+  try {
+    const shouldCall = context?.config?.shouldCall ?? true;
+
+    const parts = [
+      output,
+      context?.response,
+      context?.response?.output,
+      context?.response?.raw,
+    ].map((v) => {
+      if (v == null) return '';
+      if (typeof v === 'string') return v;
+      try {
+        return JSON.stringify(v);
+      } catch {
+        return '';
+      }
+    });
+
+    const combined = parts.join('\n');
+    const called = combined.includes('generate_mdma');
+
+    if (shouldCall) {
+      return {
+        pass: called,
+        score: called ? 1 : 0,
+        reason: called
+          ? 'Model correctly called generate_mdma tool'
+          : 'Model did not call generate_mdma — check tool definition and system prompt tool-use instruction',
+      };
+    }
+
+    return {
+      pass: !called,
+      score: !called ? 1 : 0,
+      reason: !called
+        ? 'Model correctly did not call generate_mdma for a non-document request'
+        : 'Model should not have called generate_mdma for this request',
+    };
+  } catch (err) {
+    return {
+      pass: false,
+      score: 0,
+      reason: `Assertion error: ${err instanceof Error ? err.message : String(err)}`,
+    };
+  }
+}
diff --git a/evals/own-model/assertions/component-count.mjs b/evals/own-model/assertions/component-count.mjs
new file mode 100644
index 0000000..213f2e7
--- /dev/null
+++ b/evals/own-model/assertions/component-count.mjs
@@ -0,0 +1,17 @@
+/**
+ * Asserts that the output contains at least N mdma components.
+ * Uses config.min as the minimum count (default: 5).
+ */
+export default function (output, { config }) {
+  const min = config?.min || 5;
+  const blocks = [...output.matchAll(/```mdma\n([\s\S]*?)```/g)];
+
+  if (blocks.length >= min) {
+    return { pass: true, score: 1, reason: `Found ${blocks.length} components (min: ${min})` };
+  }
+  return {
+    pass: false,
+    score: blocks.length / min,
+    reason: `Expected at least ${min} components, found ${blocks.length}`,
+  };
+}
diff --git a/evals/own-model/assertions/exact-field-count.mjs b/evals/own-model/assertions/exact-field-count.mjs
new file mode 100644
index 0000000..907d9f6
--- /dev/null
+++ b/evals/own-model/assertions/exact-field-count.mjs
@@ -0,0 +1,32 @@
+/**
+ * Asserts that a form contains exactly N fields (using `- name:` occurrences).
+ *
+ * Uses `config.expected` as the expected count.
+ * Tolerant: passes if count matches exactly.
+ */
+export default function (output, { config }) {
+  const expected = config.expected;
+  if (!expected) {
+    return { pass: false, score: 0, reason: 'No config.expected (field count) provided' };
+  }
+
+  // Count field definitions inside mdma blocks
+  const blocks = [...output.matchAll(/```mdma\n([\s\S]*?)```/g)];
+  const formBlocks = blocks.filter((b) => b[1].includes('type: form'));
+
+  let totalFields = 0;
+  for (const block of formBlocks) {
+    const fieldNames = block[1].match(/- name:/g) || [];
+    totalFields += fieldNames.length;
+  }
+
+  if (totalFields === expected) {
+    return { pass: true, score: 1, reason: `Exactly ${expected} form fields found` };
+  }
+
+  return {
+    pass: false,
+    score: totalFields > expected ? 0.5 : totalFields / expected,
+    reason: `Expected exactly ${expected} form fields, found ${totalFields}`,
+  };
+}
diff --git a/evals/own-model/assertions/fields-preserved.mjs b/evals/own-model/assertions/fields-preserved.mjs
new file mode 100644
index 0000000..f7f0fe6
--- /dev/null
+++ b/evals/own-model/assertions/fields-preserved.mjs
@@ -0,0 +1,33 @@
+/**
+ * Asserts that specific field names are still present in the output.
+ *
+ * Used to verify that after a user requests an adjustment (e.g. tone change),
+ * the original fields defined in the MDMA document are preserved.
+ *
+ * Expects `assertion.value` to be a comma-separated list of field names/keywords
+ * that must all be present in the output.
+ */
+export default function (output, { assertion }) {
+  const requiredFields = assertion.value
+    .split(',')
+    .map((f) => f.trim())
+    .filter(Boolean);
+
+  const missing = requiredFields.filter(
+    (field) => !output.toLowerCase().includes(field.toLowerCase()),
+  );
+
+  if (missing.length === 0) {
+    return {
+      pass: true,
+      score: 1,
+      reason: `All ${requiredFields.length} fields preserved: ${requiredFields.join(', ')}`,
+    };
+  }
+
+  return {
+    pass: false,
+    score: (requiredFields.length - missing.length) / requiredFields.length,
+    reason: `Missing fields after adjustment: ${missing.join(', ')}`,
+  };
+}
diff --git a/evals/own-model/assertions/file-field.mjs b/evals/own-model/assertions/file-field.mjs
new file mode 100644
index 0000000..a8d8520
--- /dev/null
+++ b/evals/own-model/assertions/file-field.mjs
@@ -0,0 +1,43 @@
+/**
+ * Asserts that the output contains a form with a file-typed field.
+ *
+ * Optional config:
+ *   - sensitive: boolean — asserts the file field sets `sensitive: true`
+ *
+ * Note: `accept` and `multiple` are renderer-level concerns and are NOT part
+ * of the MDMA spec, so they are not asserted here.
+ */
+export default function (output, { config } = {}) {
+  const blockRegex = /```mdma\n([\s\S]*?)```/g;
+  const blocks = [...output.matchAll(blockRegex)].map((m) => m[1]);
+
+  const formBlocks = blocks.filter((b) => /^type:\s*form/m.test(b));
+  if (formBlocks.length === 0) {
+    return { pass: false, score: 0, reason: 'No form block found in output' };
+  }
+
+  const fileBlock = formBlocks.find((b) => /type:\s*file\b/.test(b));
+  if (!fileBlock) {
+    return {
+      pass: false,
+      score: 0,
+      reason: 'No form field with `type: file` found',
+    };
+  }
+
+  const reasons = ['Form contains a file field'];
+
+  if (config?.sensitive === true) {
+    const sensitivePattern = /type:\s*file[\s\S]{0,200}sensitive:\s*true/;
+    if (!sensitivePattern.test(fileBlock)) {
+      return {
+        pass: false,
+        score: 0,
+        reason: 'File field expected sensitive: true but not found',
+      };
+    }
+    reasons.push('sensitive: true');
+  }
+
+  return { pass: true, score: 1, reason: reasons.join('; ') };
+}
diff --git a/evals/own-model/assertions/fixer-contains-component.mjs b/evals/own-model/assertions/fixer-contains-component.mjs
new file mode 100644
index 0000000..927b79f
--- /dev/null
+++ b/evals/own-model/assertions/fixer-contains-component.mjs
@@ -0,0 +1,139 @@
+import { parse } from 'yaml';
+
+/**
+ * Custom promptfoo assertion for fixer eval.
+ *
+ * Finds a component in the fixed output and validates its fields against an
+ * expected MDMA block provided in config.
+ *
+ * config:
+ *   expected: string   — complete (or partial) MDMA block YAML to compare against.
+ *                        The `id` field in the expected block is used to locate the
+ *                        component in the output. Every field present in `expected`
+ *                        must match the actual component — extra fields in the
+ *                        actual output are ignored.
+ *   hasFields: string[] — additional field names that must exist (any value).
+ *
+ * Example:
+ *   config:
+ *     expected: |
+ *       type: webhook
+ *       id: order-webhook
+ *       url: https://api.example.com/orders
+ *       method: POST
+ *       trigger: order-form
+ */
+export default function (output, { config } = {}) {
+  const { expected: expectedYaml, hasFields = [] } = config ?? {};
+
+  if (!expectedYaml) {
+    return { pass: false, score: 0, reason: 'No expected block provided in config' };
+  }
+
+  let expected;
+  try {
+    expected = parse(expectedYaml);
+  } catch (e) {
+    return { pass: false, score: 0, reason: `Could not parse expected block: ${e.message}` };
+  }
+
+  const id = expected?.id;
+  if (!id) {
+    return { pass: false, score: 0, reason: 'Expected block has no id field' };
+  }
+
+  // Extract raw YAML strings from each ```mdma block in the output
+  const blocks = [];
+  const blockRegex = /```mdma\n([\s\S]*?)```/g;
+  let match;
+  while ((match = blockRegex.exec(output)) !== null) {
+    blocks.push(match[1]);
+  }
+
+  // Find and parse the block whose top-level id matches
+  let actual = null;
+  let actualRaw = null;
+  for (const raw of blocks) {
+    let parsed;
+    try {
+      parsed = parse(raw);
+    } catch {
+      continue;
+    }
+    if (parsed?.id === id) {
+      actual = parsed;
+      actualRaw = raw.trim();
+      break;
+    }
+  }
+
+  if (!actual) {
+    return {
+      pass: false,
+      score: 0,
+      reason: `Component "${id}" not found in output (${blocks.length} block(s) present)`,
+    };
+  }
+
+  // Deep compare every field in expected against actual
+  const failures = compareFields(expected, actual, '');
+
+  // Check hasFields presence
+  for (const field of hasFields) {
+    if (actual[field] === undefined || actual[field] === null || actual[field] === '') {
+      failures.push(`field "${field}" is missing or empty`);
+    }
+  }
+
+  if (failures.length > 0) {
+    return {
+      pass: false,
+      score: 0,
+      reason: `Component "${id}" field mismatch:\n${failures.join('\n')}\n\nActual block:\n${actualRaw}`,
+    };
+  }
+
+  return {
+    pass: true,
+    score: 1,
+    reason: `Component "${id}" matches expected block`,
+  };
+}
+
+function compareFields(expected, actual, prefix) {
+  const failures = [];
+  for (const [key, expectedVal] of Object.entries(expected)) {
+    const path = prefix ? `${prefix}.${key}` : key;
+    const actualVal = actual?.[key];
+
+    if (expectedVal === null || expectedVal === undefined) {
+      // null in expected = presence check only
+      if (actualVal === undefined || actualVal === null || actualVal === '') {
+        failures.push(`"${path}" is missing or empty`);
+      }
+    } else if (Array.isArray(expectedVal)) {
+      if (!Array.isArray(actualVal)) {
+        failures.push(`"${path}" should be an array, got ${typeof actualVal}`);
+      } else if (expectedVal.length !== actualVal.length) {
+        failures.push(`"${path}" length: expected ${expectedVal.length}, got ${actualVal.length}`);
+      } else {
+        for (let i = 0; i < expectedVal.length; i++) {
+          if (typeof expectedVal[i] === 'object' && expectedVal[i] !== null) {
+            failures.push(...compareFields(expectedVal[i], actualVal[i] ?? {}, `${path}[${i}]`));
+          } else if (expectedVal[i] !== actualVal[i]) {
+            failures.push(
+              `"${path}[${i}]": expected ${JSON.stringify(expectedVal[i])}, got ${JSON.stringify(actualVal[i])}`,
+            );
+          }
+        }
+      }
+    } else if (typeof expectedVal === 'object') {
+      failures.push(...compareFields(expectedVal, actualVal ?? {}, path));
+    } else if (actualVal !== expectedVal) {
+      failures.push(
+        `"${path}": expected ${JSON.stringify(expectedVal)}, got ${JSON.stringify(actualVal)}`,
+      );
+    }
+  }
+  return failures;
+}
diff --git a/evals/own-model/assertions/fixer-no-prose.mjs b/evals/own-model/assertions/fixer-no-prose.mjs
new file mode 100644
index 0000000..0746d09
--- /dev/null
+++ b/evals/own-model/assertions/fixer-no-prose.mjs
@@ -0,0 +1,31 @@
+/**
+ * Custom promptfoo assertion for fixer eval.
+ *
+ * Enforces that the fixer output contains ONLY ```mdma blocks — no prose,
+ * headings, intro/outro text, or commentary outside the blocks. The fixer's
+ * job is to repair MDMA blocks, not to converse with the user.
+ *
+ * Allowed in the output: ```mdma blocks and whitespace between them.
+ * Disallowed: prose paragraphs, Markdown headings, lists, code fences other
+ * than `mdma`, or any text outside a ```mdma ... ``` pair.
+ */
+export default function (output) {
+  // Strip every ```mdma ... ``` block (greedy across newlines, non-greedy on content)
+  const stripped = output.replace(/```mdma\n[\s\S]*?```/g, '').trim();
+
+  if (stripped.length === 0) {
+    return {
+      pass: true,
+      score: 1,
+      reason: 'Fixer output contains only ```mdma blocks (no prose)',
+    };
+  }
+
+  // Truncate the offending content for the failure message
+  const preview = stripped.length > 200 ? `${stripped.slice(0, 200)}...` : stripped;
+  return {
+    pass: false,
+    score: 0,
+    reason: `Fixer output contains non-mdma content (${stripped.length} chars):\n${preview}`,
+  };
+}
diff --git a/evals/own-model/assertions/fixer-preserves-components.mjs b/evals/own-model/assertions/fixer-preserves-components.mjs
new file mode 100644
index 0000000..2b455d3
--- /dev/null
+++ b/evals/own-model/assertions/fixer-preserves-components.mjs
@@ -0,0 +1,33 @@
+/**
+ * Custom promptfoo assertion for fixer eval.
+ *
+ * Verifies that the fixer didn't drop components. The fixed output
+ * should contain at least config.min mdma blocks (default: same as input).
+ */
+export default function (output, { config } = {}) {
+  const min = config?.min ?? 1;
+  const max = config?.max ?? Number.POSITIVE_INFINITY;
+  const blockCount = (output.match(/```mdma/g) ?? []).length;
+
+  if (blockCount < min) {
+    return {
+      pass: false,
+      score: 0,
+      reason: `Fixer output has ${blockCount} mdma block(s) but expected at least ${min}`,
+    };
+  }
+
+  if (blockCount > max) {
+    return {
+      pass: false,
+      score: 0,
+      reason: `Fixer output has ${blockCount} mdma block(s) but expected at most ${max}`,
+    };
+  }
+
+  return {
+    pass: true,
+    score: 1,
+    reason: `Fixer preserved ${blockCount} mdma block(s) (min: ${min}${max !== Number.POSITIVE_INFINITY ? `, max: ${max}` : ''})`,
+  };
+}
diff --git a/evals/own-model/assertions/fixer-resolves-errors.mjs b/evals/own-model/assertions/fixer-resolves-errors.mjs
new file mode 100644
index 0000000..d675c19
--- /dev/null
+++ b/evals/own-model/assertions/fixer-resolves-errors.mjs
@@ -0,0 +1,63 @@
+import { validate } from '@mobile-reality/mdma-validator';
+
+/**
+ * Custom promptfoo assertion for fixer eval.
+ *
+ * Validates that the LLM-fixed output:
+ * 1. Contains at least one mdma block (didn't strip everything)
+ * 2. Has zero unfixed errors after validation
+ * 3. Reports remaining warnings/infos for transparency
+ *
+ * The config.maxWarnings option (default: Infinity) allows tests to assert
+ * that the fixer also resolved warnings.
+ */
+export default function (output, { config } = {}) {
+  const maxWarnings = config?.maxWarnings ?? Infinity;
+  const exclude = config?.exclude ?? ['thinking-block', 'flow-ordering'];
+
+  // Check the output actually contains mdma blocks
+  const blockCount = (output.match(/```mdma/g) ?? []).length;
+  if (blockCount === 0) {
+    return {
+      pass: false,
+      score: 0,
+      reason: 'Fixer output contains no ```mdma blocks — the LLM may have stripped the document',
+    };
+  }
+
+  const result = validate(output, {
+    exclude,
+    autoFix: false,
+  });
+
+  const unfixedErrors = result.issues.filter((i) => i.severity === 'error');
+  const unfixedWarnings = result.issues.filter((i) => i.severity === 'warning');
+
+  if (unfixedErrors.length > 0) {
+    const details = unfixedErrors
+      .map((i) => `[${i.ruleId}] ${i.componentId ?? '?'}: ${i.message}`)
+      .join('\n');
+    return {
+      pass: false,
+      score: 0,
+      reason: `Fixer output still has ${unfixedErrors.length} error(s):\n${details}`,
+    };
+  }
+
+  if (unfixedWarnings.length > maxWarnings) {
+    const details = unfixedWarnings
+      .map((i) => `[${i.ruleId}] ${i.componentId ?? '?'}: ${i.message}`)
+      .join('\n');
+    return {
+      pass: false,
+      score: 0.5,
+      reason: `Fixer output has ${unfixedWarnings.length} warning(s) (max ${maxWarnings}):\n${details}`,
+    };
+  }
+
+  return {
+    pass: true,
+    score: 1,
+    reason: `Fixer resolved all errors (${result.summary.warnings} warnings, ${result.summary.infos} info, ${blockCount} blocks)`,
+  };
+}
diff --git a/evals/own-model/assertions/form-fields-match.mjs b/evals/own-model/assertions/form-fields-match.mjs
new file mode 100644
index 0000000..311db89
--- /dev/null
+++ b/evals/own-model/assertions/form-fields-match.mjs
@@ -0,0 +1,101 @@
+/**
+ * Deep validation: checks that generated mdma form blocks contain the
+ * expected fields with correct attributes.
+ *
+ * config.expectedForms: Array of { fields: string[], sensitive?: string[] }
+ *   - fields: field names that must appear in the form block
+ *   - sensitive: field names that must be marked sensitive: true
+ *
+ * If multiple expectedForms are provided, they are matched in order to
+ * the mdma form blocks found in the output.
+ */
+export default function (output, { config }) {
+  const expectedForms = config?.expectedForms || [];
+  if (expectedForms.length === 0) {
+    return { pass: true, score: 1, reason: 'No expected forms to check' };
+  }
+
+  // Extract all mdma form blocks
+  const blockRegex = /```mdma\n([\s\S]*?)```/g;
+  const blocks = [...output.matchAll(blockRegex)];
+  const formBlocks = blocks.map((b) => b[1].trim()).filter((b) => /^type:\s*form/m.test(b));
+
+  if (formBlocks.length === 0) {
+    return {
+      pass: false,
+      score: 0,
+      reason: `Expected ${expectedForms.length} form block(s) but found none`,
+    };
+  }
+
+  const results = [];
+  let totalScore = 0;
+
+  for (let i = 0; i < expectedForms.length; i++) {
+    const expected = expectedForms[i];
+    const block = formBlocks[i];
+
+    if (!block) {
+      results.push(`Form ${i + 1}: missing (expected ${expected.fields.length} fields)`);
+      continue;
+    }
+
+    const blockLower = block.toLowerCase();
+
+    // Check field names
+    const fieldsFound = expected.fields.filter(
+      (f) =>
+        blockLower.includes(`name: ${f.toLowerCase()}`) ||
+        blockLower.includes(`name: "${f.toLowerCase()}"`),
+    );
+    const fieldScore = fieldsFound.length / expected.fields.length;
+
+    // Check onSubmit is present
+    const hasOnSubmit = /onSubmit:\s*\S+/i.test(block);
+    if (!hasOnSubmit) {
+      results.push(`Form ${i + 1}: missing onSubmit (no submit button)`);
+    }
+
+    // Check sensitive flags
+    let sensitiveScore = 1;
+    if (expected.sensitive && expected.sensitive.length > 0) {
+      // For each sensitive field, check that it has sensitive: true nearby
+      let sensitiveFound = 0;
+      for (const sf of expected.sensitive) {
+        // Find the field block and check for sensitive: true
+        const fieldPattern = new RegExp(`name:\\s*"?${sf}"?[\\s\\S]{0,200}sensitive:\\s*true`, 'i');
+        if (fieldPattern.test(block)) {
+          sensitiveFound++;
+        }
+      }
+      sensitiveScore = sensitiveFound / expected.sensitive.length;
+    }
+
+    const submitScore = hasOnSubmit ? 1 : 0;
+    const formScore = (fieldScore + sensitiveScore + submitScore) / 3;
+    totalScore += formScore;
+
+    const missingFields = expected.fields.filter((f) => !fieldsFound.includes(f));
+    if (missingFields.length > 0) {
+      results.push(
+        `Form ${i + 1}: missing fields [${missingFields.join(', ')}] (${fieldsFound.length}/${expected.fields.length} found)`,
+      );
+    }
+    if (sensitiveScore < 1 && expected.sensitive) {
+      results.push(
+        `Form ${i + 1}: some sensitive flags missing (score: ${sensitiveScore.toFixed(2)})`,
+      );
+    }
+    if (missingFields.length === 0 && sensitiveScore === 1) {
+      results.push(`Form ${i + 1}: all ${expected.fields.length} fields correct`);
+    }
+  }
+
+  const avgScore = totalScore / expectedForms.length;
+
+  return {
+    pass: avgScore >= 0.5,
+    score: avgScore,
+    reason: results.join('; '),
+  };
+}
diff --git a/evals/own-model/assertions/has-bindings.mjs b/evals/own-model/assertions/has-bindings.mjs
new file mode 100644
index 0000000..2921e5f
--- /dev/null
+++ b/evals/own-model/assertions/has-bindings.mjs
@@ -0,0 +1,16 @@
+/**
+ * Asserts that the output contains binding expressions ({{ }}).
+ */
+export default function (output) {
+  const bindingPattern = /\{\{[a-z][a-zA-Z0-9_-]*\.[a-zA-Z0-9_.-]+\}\}/g;
+  const matches = output.match(bindingPattern) || [];
+
+  if (matches.length > 0) {
+    return {
+      pass: true,
+      score: 1,
+      reason: `Found ${matches.length} binding(s): ${matches.slice(0, 3).join(', ')}`,
+    };
+  }
+  return { pass: false, score: 0, reason: 'No binding expressions ({{component.field}}) found' };
+}
diff --git a/evals/own-model/assertions/has-confirm.mjs b/evals/own-model/assertions/has-confirm.mjs
new file mode 100644
index 0000000..23ad212
--- /dev/null
+++ b/evals/own-model/assertions/has-confirm.mjs
@@ -0,0 +1,17 @@
+/**
+ * Asserts that the output contains a button with a confirm dialog.
+ */
+export default function (output) {
+  const hasButton = output.includes('type: button');
+  const hasConfirm = output.includes('confirm:');
+  const hasConfirmText = output.includes('confirmText:') || output.includes('message:');
+
+  if (hasButton && hasConfirm && hasConfirmText) {
+    return { pass: true, score: 1, reason: 'Button with confirmation dialog found' };
+  }
+  return {
+    pass: false,
+    score: hasButton ? 0.5 : 0,
+    reason: `Expected button with confirm dialog. ${!hasButton ? 'No button found' : 'Missing confirm config'}`,
+  };
+}
diff --git a/evals/own-model/assertions/has-required-fields.mjs b/evals/own-model/assertions/has-required-fields.mjs
new file mode 100644
index 0000000..e128ea6
--- /dev/null
+++ b/evals/own-model/assertions/has-required-fields.mjs
@@ -0,0 +1,17 @@
+/**
+ * Asserts that the output contains at least N fields with required: true.
+ * Uses config.min as the minimum count (default: 2).
+ */
+export default function (output, { config }) {
+  const minRequired = config?.min || 2;
+  const matches = output.match(/required:\s*true/g) || [];
+
+  if (matches.length >= minRequired) {
+    return { pass: true, score: 1, reason: `Found ${matches.length} required fields` };
+  }
+  return {
+    pass: false,
+    score: matches.length / minRequired,
+    reason: `Expected at least ${minRequired} required: true flags, found ${matches.length}`,
+  };
+}
diff --git a/evals/own-model/assertions/has-sensitive.mjs b/evals/own-model/assertions/has-sensitive.mjs
new file mode 100644
index 0000000..8a64a74
--- /dev/null
+++ b/evals/own-model/assertions/has-sensitive.mjs
@@ -0,0 +1,9 @@
+/**
+ * Asserts that the output contains at least one sensitive: true flag.
+ */
+export default function (output) {
+  if (output.includes('sensitive: true')) {
+    return { pass: true, score: 1, reason: 'Found sensitive: true flag' };
+  }
+  return { pass: false, score: 0, reason: 'Expected at least one sensitive: true flag' };
+}
diff --git a/evals/own-model/assertions/has-webhook.mjs b/evals/own-model/assertions/has-webhook.mjs
new file mode 100644
index 0000000..bcc1313
--- /dev/null
+++ b/evals/own-model/assertions/has-webhook.mjs
@@ -0,0 +1,18 @@
+/**
+ * Asserts that the output contains a webhook component with required fields.
+ */
+export default function (output) {
+  const hasWebhook = output.includes('type: webhook');
+  const hasUrl = output.includes('url:');
+  const hasTrigger = output.includes('trigger:');
+
+  if (hasWebhook && hasUrl && hasTrigger) {
+    return { pass: true, score: 1, reason: 'Webhook with url and trigger found' };
+  }
+
+  if (!hasWebhook) {
+    return { pass: false, score: 0, reason: 'No webhook component found' };
+  }
+  const missing = [!hasUrl && 'url', !hasTrigger && 'trigger'].filter(Boolean);
+  return { pass: false, score: 0.5, reason: `Webhook missing: ${missing.join(', ')}` };
+}
diff --git a/evals/own-model/assertions/judge-matches-expected.mjs b/evals/own-model/assertions/judge-matches-expected.mjs
new file mode 100644
index 0000000..f49d2c6
--- /dev/null
+++ b/evals/own-model/assertions/judge-matches-expected.mjs
@@ -0,0 +1,127 @@
+import { validateConversation } from '@mobile-reality/mdma-validator';
+
+/**
+ * Custom promptfoo assertion for the conversation-judge eval.
+ *
+ * Required:
+ *   - `vars.expectedJudgment` — 'valid' | 'invalid'
+ *
+ * Optional per-test config:
+ *   - `expectedRules: string[]` — when expectedJudgment is 'invalid',
+ *     rule names that MUST appear in the LLM judge's issues array.
+ *
+ * Optional cross-check (turned on when `vars.steps` is provided):
+ *   - Runs `validateConversation()` on the assistant messages with the
+ *     given step definition. Asserts the deterministic validator agrees
+ *     with both `vars.expectedJudgment` AND the LLM judge.
+ *
+ * Passes only when every check it ran agrees. Fails on the first
+ * disagreement and reports what was off (LLM, validator, or both).
+ */
+export default function (output, context) {
+  const vars = context?.vars ?? {};
+  const config = context?.config ?? {};
+  const expectedJudgment = vars.expectedJudgment;
+
+  if (expectedJudgment !== 'valid' && expectedJudgment !== 'invalid') {
+    return {
+      pass: false,
+      score: 0,
+      reason: `Test missing or invalid vars.expectedJudgment (got: ${JSON.stringify(expectedJudgment)})`,
+    };
+  }
+
+  // --- Parse the LLM judge's JSON output ---
+  const fencedMatch = output.match(/```(?:json)?\s*\n?(\{[\s\S]*?\})\s*\n?```/);
+  const candidate = fencedMatch ? fencedMatch[1] : output.trim();
+
+  let judgment;
+  try {
+    judgment = JSON.parse(candidate);
+  } catch (err) {
+    return {
+      pass: false,
+      score: 0,
+      reason: `Judge output is not valid JSON: ${err.message}\nOutput (first 300 chars): ${output.slice(0, 300)}`,
+    };
+  }
+  if (typeof judgment?.valid !== 'boolean' || !Array.isArray(judgment.issues)) {
+    return {
+      pass: false,
+      score: 0,
+      reason: `Judge JSON missing required fields (boolean "valid" and array "issues")`,
+    };
+  }
+
+  const expectedValid = expectedJudgment === 'valid';
+  const llmValid = judgment.valid;
+
+  // --- Check 1: LLM judge matches expectedJudgment ---
+  if (llmValid !== expectedValid) {
+    const issuesSummary = judgment.issues
+      .slice(0, 5)
+      .map((i) => `  [msg ${i.messageIndex}, ${i.rule}] ${i.issue}`)
+      .join('\n');
+    return {
+      pass: false,
+      score: 0,
+      reason: `LLM judge expected "${expectedJudgment}" but returned "${llmValid ? 'valid' : 'invalid'}".\nJudge's issues:\n${issuesSummary || '  (none)'}`,
+    };
+  }
+
+  // --- Check 2: required rules surfaced (only for invalid cases) ---
+  const expectedRules = Array.isArray(config.expectedRules) ? config.expectedRules : null;
+  if (expectedRules && !expectedValid) {
+    const seenRules = new Set(judgment.issues.map((i) => i.rule));
+    const missing = expectedRules.filter((r) => !seenRules.has(r));
+    if (missing.length > 0) {
+      return {
+        pass: false,
+        score: 0.5,
+        reason: `LLM judge correctly marked invalid but missed expected rule violation(s): ${missing.join(', ')}.\nSeen rules: ${[...seenRules].join(', ') || '(none)'}`,
+      };
+    }
+  }
+
+  // --- Check 3: cross-check against validateConversation (deterministic) ---
+  // Activated when the test provides `vars.steps`. Runs the deterministic
+  // validator on the assistant messages and asserts it agrees with both
+  // the expected judgment AND the LLM's judgment.
+  let crossCheckSummary = '';
+  if (Array.isArray(vars.steps) && vars.steps.length > 0) {
+    const assistantMessages = (Array.isArray(vars.conversation) ? vars.conversation : [])
+      .filter((t) => t.role === 'assistant')
+      .map((t) => t.content ?? '');
+
+    const validatorResult = validateConversation(assistantMessages, {
+      steps: vars.steps,
+      exclude: ['thinking-block'],
+    });
+    const validatorOk = validatorResult.ok;
+
+    if (validatorOk !== expectedValid) {
+      const errs = validatorResult.issues
+        .filter((i) => i.severity === 'error')
+        .slice(0, 5)
+        .map((i) => `  [msg ${i.messageIndex}] ${i.message}`)
+        .join('\n');
+      return {
+        pass: false,
+        score: 0,
+        reason: `validateConversation disagrees with expected judgment.\nExpected: "${expectedJudgment}".\nDeterministic validator: "${validatorOk ? 'valid' : 'invalid'}".\nLLM judge: "${llmValid ? 'valid' : 'invalid'}".\nValidator errors:\n${errs || '  (none)'}`,
+      };
+    }
+
+    // Both agree with expected → cross-check passed
+    const errCount = validatorResult.issues.filter((i) => i.severity === 'error').length;
+    crossCheckSummary = ` | validator: ${validatorOk ? 'ok' : `${errCount} error(s)`}`;
+  }
+
+  return {
+    pass: true,
+    score: 1,
+    reason: expectedValid
+      ? `Judge correctly marked the conversation as valid${crossCheckSummary}`
+      : `Judge correctly marked the conversation as invalid (${judgment.issues.length} issue${judgment.issues.length === 1 ? '' : 's'})${crossCheckSummary}`,
+  };
+}
diff --git a/evals/own-model/assertions/mentions-fields.mjs b/evals/own-model/assertions/mentions-fields.mjs
new file mode 100644
index 0000000..b1b68b9
--- /dev/null
+++ b/evals/own-model/assertions/mentions-fields.mjs
@@ -0,0 +1,34 @@
+/**
+ * Asserts that the generated prompt mentions a minimum percentage of the
+ * configured field names.
+ *
+ * config.fields: string[]  — field names to look for
+ * config.minRatio: number  — minimum ratio of fields that must appear (default: 0.5)
+ */
+export default function (output, { config }) {
+  const fields = config?.fields || [];
+  const minRatio = config?.minRatio ?? 0.5;
+
+  if (fields.length === 0) {
+    return { pass: true, score: 1, reason: 'No fields to check' };
+  }
+
+  const lower = output.toLowerCase();
+  const found = fields.filter((f) => lower.includes(f.toLowerCase()));
+  const ratio = found.length / fields.length;
+
+  if (ratio >= minRatio) {
+    return {
+      pass: true,
+      score: ratio,
+      reason: `Found ${found.length}/${fields.length} field names (${(ratio * 100).toFixed(0)}%)`,
+    };
+  }
+
+  const missing = fields.filter((f) => !lower.includes(f.toLowerCase()));
+  return {
+    pass: false,
+    score: ratio,
+    reason: `Only found ${found.length}/${fields.length} field names (need ${(minRatio * 100).toFixed(0)}%). Missing: ${missing.join(', ')}`,
+  };
+}
diff --git a/evals/own-model/assertions/mentions-trigger.mjs b/evals/own-model/assertions/mentions-trigger.mjs
new file mode 100644
index 0000000..bca49ea
--- /dev/null
+++ b/evals/own-model/assertions/mentions-trigger.mjs
@@ -0,0 +1,114 @@
+/**
+ * Asserts that the generated customPrompt includes trigger/when-to-generate
+ * instructions matching the configured trigger mode.
+ *
+ * config.mode: 'keyword' | 'immediate' | 'contextual' | 'form-submit' | 'multi-step'
+ * config.keywords: string[] — for keyword mode, specific phrases to check
+ * config.contextHints: string[] — for contextual mode, hints to look for
+ * config.steps: { mode: string, keywords?: string[] }[] — for multi-step mode
+ */
+export default function (output, { config }) {
+  const mode = config?.mode;
+  const lower = output.toLowerCase();
+
+  if (mode === 'keyword') {
+    const keywords = config?.keywords || [];
+    if (keywords.length === 0) {
+      return { pass: true, score: 1, reason: 'No keywords to check' };
+    }
+    const found = keywords.filter((kw) => lower.includes(kw.toLowerCase()));
+    if (found.length > 0) {
+      return {
+        pass: true,
+        score: found.length / keywords.length,
+        reason: `Found ${found.length}/${keywords.length} trigger keywords: ${found.join(', ')}`,
+      };
+    }
+    return {
+      pass: false,
+      score: 0,
+      reason: `None of the trigger keywords found: ${keywords.join(', ')}`,
+    };
+  }
+
+  if (mode === 'immediate') {
+    const markers = /immediate|first message|always|conversation start|right away/;
+    if (markers.test(lower)) {
+      return { pass: true, score: 1, reason: 'Found immediate trigger instruction' };
+    }
+    return { pass: false, score: 0, reason: 'Missing immediate trigger instruction' };
+  }
+
+  if (mode === 'contextual') {
+    const hints = config?.contextHints || [];
+    if (hints.length === 0) {
+      const contextMarkers = /when.*user|after.*attempt|if.*express|condition|context/;
+      if (contextMarkers.test(lower)) {
+        return { pass: true, score: 1, reason: 'Found contextual trigger language' };
+      }
+      return { pass: false, score: 0, reason: 'Missing contextual trigger language' };
+    }
+    const found = hints.filter((h) => lower.includes(h.toLowerCase()));
+    if (found.length > 0) {
+      return {
+        pass: true,
+        score: found.length / hints.length,
+        reason: `Found ${found.length}/${hints.length} context hints`,
+      };
+    }
+    return {
+      pass: false,
+      score: 0,
+      reason: `None of the contextual hints found: ${hints.join(', ')}`,
+    };
+  }
+
+  if (mode === 'form-submit') {
+    const markers = /submit|after.*form|previous step|form.*complet|upon.*submis/;
+    if (markers.test(lower)) {
+      return { pass: true, score: 1, reason: 'Found form-submit trigger instruction' };
+    }
+    return { pass: false, score: 0, reason: 'Missing form-submit trigger instruction' };
+  }
+
+  if (mode === 'multi-step') {
+    // Check that output describes a multi-step / sequential flow
+    const stepMarkers =
+      /step\s*[12345]|phase\s*[12345]|first.*then|after.*submit|next.*step|sequential|in order/i;
+    if (!stepMarkers.test(output)) {
+      return { pass: false, score: 0, reason: 'Output does not describe a multi-step flow' };
+    }
+
+    // Optionally check per-step trigger modes
+    const steps = config?.steps || [];
+    if (steps.length === 0) {
+      return { pass: true, score: 1, reason: 'Found multi-step flow language' };
+    }
+
+    let matched = 0;
+    for (const step of steps) {
+      if (step.mode === 'keyword' && step.keywords) {
+        const found = step.keywords.some((kw) => lower.includes(kw.toLowerCase()));
+        if (found) matched++;
+      } else if (step.mode === 'immediate') {
+        if (/immediate|first message|always|conversation start/.test(lower)) matched++;
+      } else if (step.mode === 'form-submit') {
+        if (/submit|after.*form|previous step/.test(lower)) matched++;
+      } else if (step.mode === 'contextual' && step.keywords) {
+        const found = step.keywords.some((kw) => lower.includes(kw.toLowerCase()));
+        if (found) matched++;
+      } else {
+        matched++; // no specific check, count as passed
+      }
+    }
+
+    const score = matched / steps.length;
+    return {
+      pass: score >= 0.5,
+      score,
+      reason: `Matched ${matched}/${steps.length} step triggers in multi-step flow`,
+    };
+  }
+
+  return { pass: true, score: 1, reason: 'No trigger mode specified' };
+}
diff --git a/evals/own-model/assertions/no-mdma-regeneration.mjs b/evals/own-model/assertions/no-mdma-regeneration.mjs
new file mode 100644
index 0000000..0157e24
--- /dev/null
+++ b/evals/own-model/assertions/no-mdma-regeneration.mjs
@@ -0,0 +1,27 @@
+/**
+ * Asserts that the follow-up response does NOT contain full MDMA code blocks.
+ *
+ * After the initial generation, follow-up turns (tone changes, clarifications,
+ * field tweaks) should produce conversational responses — not regenerate the
+ * entire MDMA document from scratch.
+ */
+export default function (output) {
+  const mdmaBlocks = [...output.matchAll(/```mdma\n([\s\S]*?)```/g)];
+
+  // Thinking blocks are always required by the MDMA spec — don't count them as regeneration
+  const nonThinking = mdmaBlocks.filter((m) => !/^\s*type:\s*thinking\b/m.test(m[1]));
+
+  if (nonThinking.length === 0) {
+    return {
+      pass: true,
+      score: 1,
+      reason: 'No MDMA blocks regenerated — conversational reply only',
+    };
+  }
+
+  return {
+    pass: false,
+    score: 0,
+    reason: `Expected no MDMA blocks in follow-up, but found ${nonThinking.length} non-thinking block(s). The model regenerated the document instead of responding conversationally.`,
+  };
+}
diff --git a/evals/own-model/assertions/no-multi-step-flow.mjs b/evals/own-model/assertions/no-multi-step-flow.mjs
new file mode 100644
index 0000000..634f03c
--- /dev/null
+++ b/evals/own-model/assertions/no-multi-step-flow.mjs
@@ -0,0 +1,33 @@
+import { validate } from '@mobile-reality/mdma-validator';
+
+/**
+ * Custom promptfoo assertion for fixer eval.
+ *
+ * Verifies that the fixer output has no flow-ordering errors.
+ * This relies on the validator's own logic for detecting multi-step
+ * flows, circular references, and multiple interactive types.
+ */
+export default function (output) {
+  const result = validate(output, {
+    exclude: ['thinking-block'],
+    autoFix: false,
+  });
+
+  const flowErrors = result.issues.filter(
+    (i) => i.ruleId === 'flow-ordering' && i.severity === 'error',
+  );
+
+  if (flowErrors.length > 0) {
+    return {
+      pass: false,
+      score: 0,
+      reason: `Fixer output still has ${flowErrors.length} flow-ordering error(s):\n${flowErrors.map((i) => i.message).join('\n')}`,
+    };
+  }
+
+  return {
+    pass: true,
+    score: 1,
+    reason: 'No flow-ordering errors',
+  };
+}
diff --git a/evals/own-model/assertions/no-placeholder-content.mjs b/evals/own-model/assertions/no-placeholder-content.mjs
new file mode 100644
index 0000000..28144b0
--- /dev/null
+++ b/evals/own-model/assertions/no-placeholder-content.mjs
@@ -0,0 +1,55 @@
+/**
+ * Custom promptfoo assertion that checks for placeholder content
+ * in visible text and mdma blocks (excluding thinking blocks).
+ *
+ * Thinking blocks may mention placeholders as part of reasoning —
+ * that's fine. We only care about placeholders in rendered content.
+ */
+const PLACEHOLDER_PATTERNS = [
+  /\bTODO\b/i,
+  /\bTBD\b/i,
+  /\bFIXME\b/i,
+  /\bLorem\s*ipsum\b/i,
+  /^\.{3,}$/m,
+];
+
+export default function (output) {
+  // Extract mdma blocks and classify them
+  const blocks = [...output.matchAll(/```mdma\s*([\s\S]*?)```/g)];
+
+  for (const block of blocks) {
+    const yaml = block[1];
+    // Skip thinking blocks
+    if (/^\s*type:\s*thinking\b/m.test(yaml)) continue;
+
+    for (const pattern of PLACEHOLDER_PATTERNS) {
+      if (pattern.test(yaml)) {
+        const match = yaml.match(pattern);
+        return {
+          pass: false,
+          score: 0,
+          reason: `Placeholder content "${match[0]}" found in mdma block`,
+        };
+      }
+    }
+  }
+
+  // Check visible prose (everything outside mdma blocks)
+  const prose = output.replace(/```mdma[\s\S]*?```/g, '');
+  for (const pattern of PLACEHOLDER_PATTERNS) {
+    if (pattern.test(prose)) {
+      const match = prose.match(pattern);
+      return {
+        pass: false,
+        score: 0,
+        reason: `Placeholder content "${match[0]}" found in visible text`,
+      };
+    }
+  }
+
+  return {
+    pass: true,
+    score: 1,
+    reason: 'No placeholder content found in visible output',
+  };
+}
diff --git a/evals/own-model/assertions/no-spec-repetition.mjs b/evals/own-model/assertions/no-spec-repetition.mjs
new file mode 100644
index 0000000..970737a
--- /dev/null
+++ b/evals/own-model/assertions/no-spec-repetition.mjs
@@ -0,0 +1,35 @@
+/**
+ * Asserts that the generated customPrompt does NOT repeat the full MDMA spec.
+ *
+ * A customPrompt should layer domain-specific instructions on top of the spec,
+ * not duplicate it. Checks for spec-level content that should not appear.
+ */
+export default function (output) {
+  const specMarkers = [
+    { pattern: 'MDMA_AUTHOR_PROMPT', label: 'MDMA_AUTHOR_PROMPT reference' },
+    { pattern: '## Self-Check Checklist', label: 'Self-check checklist' },
+    { pattern: 'Component Reference Table', label: 'Component reference table' },
+    { pattern: 'MUST be inside a fenced code block tagged', label: 'Base authoring rule' },
+  ];
+
+  const found = [];
+  for (const marker of specMarkers) {
+    if (output.includes(marker.pattern)) {
+      found.push(marker.label);
+    }
+  }
+
+  if (found.length === 0) {
+    return {
+      pass: true,
+      score: 1,
+      reason: 'No MDMA spec content repeated',
+    };
+  }
+
+  return {
+    pass: false,
+    score: 0,
+    reason: `CustomPrompt repeats MDMA spec content: ${found.join(', ')}`,
+  };
+}
diff --git a/evals/own-model/assertions/no-yaml-leak.mjs b/evals/own-model/assertions/no-yaml-leak.mjs
new file mode 100644
index 0000000..6769d9d
--- /dev/null
+++ b/evals/own-model/assertions/no-yaml-leak.mjs
@@ -0,0 +1,42 @@
+/**
+ * Asserts that the response does not leak raw YAML syntax in visible text.
+ *
+ * MDMA YAML (type:, id:, sensitive:, fields:, etc.) should only appear inside
+ * fenced ```mdma blocks, never in the prose the user sees. This catches cases
+ * where the model dumps component internals outside of code fences.
+ */
+export default function (output) {
+  // Strip all fenced code blocks (mdma or otherwise) to get only visible text
+  const visibleText = output.replace(/```[\s\S]*?```/g, '');
+
+  // YAML-like patterns that should never appear in visible prose
+  const yamlPatterns = [
+    /^type:\s*(form|button|tasklist|table|chart|callout|approval-gate|webhook|thinking)\b/m,
+    /^id:\s*[a-z][a-z0-9-]+$/m,
+    /^sensitive:\s*(true|false)$/m,
+    /^fields:\s*$/m,
+    /^columns:\s*$/m,
+    /^onSubmit:\s*/m,
+    /^onAction:\s*/m,
+    /^requiredApprovers:\s*\d+$/m,
+    /^variant:\s*(primary|secondary|danger|ghost|info|warning|error|success|line|bar|area|pie)\b/m,
+  ];
+
+  const leaks = [];
+  for (const pattern of yamlPatterns) {
+    const match = visibleText.match(pattern);
+    if (match) {
+      leaks.push(match[0].trim());
+    }
+  }
+
+  if (leaks.length === 0) {
+    return { pass: true, score: 1, reason: 'No YAML leaked in visible text' };
+  }
+
+  return {
+    pass: false,
+    score: 0,
+    reason: `Raw YAML leaked in visible text: ${leaks.join(', ')}`,
+  };
+}
diff --git a/evals/own-model/assertions/only-components.mjs b/evals/own-model/assertions/only-components.mjs
new file mode 100644
index 0000000..80be80d
--- /dev/null
+++ b/evals/own-model/assertions/only-components.mjs
@@ -0,0 +1,46 @@
+/**
+ * Asserts that the output contains ONLY the allowed component types (plus thinking).
+ *
+ * Pass allowed types via `config.allowed` as an array of strings.
+ * e.g. config: { allowed: [form, button] }
+ *
+ * The thinking component is always implicitly allowed.
+ * Fails if any component type appears that is not in the allow-list.
+ */
+export default function (output, { config }) {
+  const allowed = new Set((config.allowed || []).map((t) => t.trim()));
+  allowed.add('thinking'); // always permitted
+
+  const blocks = [...output.matchAll(/```mdma\n([\s\S]*?)```/g)];
+  if (blocks.length === 0) {
+    return { pass: false, score: 0, reason: 'No MDMA blocks found' };
+  }
+
+  const found = [];
+  const unexpected = [];
+
+  for (const block of blocks) {
+    const typeMatch = block[1].match(/^type:\s*(.+)$/m);
+    if (!typeMatch) continue;
+    const type = typeMatch[1].trim();
+    found.push(type);
+    if (!allowed.has(type)) {
+      unexpected.push(type);
+    }
+  }
+
+  if (unexpected.length === 0) {
+    const nonThinking = found.filter((t) => t !== 'thinking');
+    return {
+      pass: true,
+      score: 1,
+      reason: `Only allowed components generated: ${nonThinking.join(', ')}`,
+    };
+  }
+
+  return {
+    pass: false,
+    score: 0,
+    reason: `Unexpected component(s): ${unexpected.join(', ')}. Allowed: ${[...allowed].join(', ')}. All found: ${found.join(', ')}`,
+  };
+}
diff --git a/evals/own-model/assertions/pie-chart.mjs b/evals/own-model/assertions/pie-chart.mjs
new file mode 100644
index 0000000..fe63aa2
--- /dev/null
+++ b/evals/own-model/assertions/pie-chart.mjs
@@ -0,0 +1,13 @@
+/**
+ * Asserts that the output contains a pie chart variant.
+ */
+export default function (output) {
+  if (
+    output.includes('variant: pie') ||
+    output.includes("variant: 'pie'") ||
+    output.includes('variant: "pie"')
+  ) {
+    return { pass: true, score: 1, reason: 'Pie chart variant found' };
+  }
+  return { pass: false, score: 0, reason: 'Expected variant: pie in chart component' };
+}
diff --git a/evals/own-model/assertions/pii-sensitive.mjs b/evals/own-model/assertions/pii-sensitive.mjs
new file mode 100644
index 0000000..a4ae88e
--- /dev/null
+++ b/evals/own-model/assertions/pii-sensitive.mjs
@@ -0,0 +1,14 @@
+/**
+ * Asserts that at least 3 fields are marked sensitive: true (email, phone, SSN).
+ */
+export default function (output, context) {
+  const matches = output.match(/sensitive:\s*true/g) || [];
+  if (matches.length >= 3) {
+    return { pass: true, score: 1, reason: `Found ${matches.length} sensitive flags` };
+  }
+  return {
+    pass: false,
+    score: matches.length / 3,
+    reason: `Expected at least 3 sensitive: true flags, found ${matches.length}`,
+  };
+}
diff --git a/evals/own-model/assertions/prompt-has-sections.mjs b/evals/own-model/assertions/prompt-has-sections.mjs
new file mode 100644
index 0000000..cda19a9
--- /dev/null
+++ b/evals/own-model/assertions/prompt-has-sections.mjs
@@ -0,0 +1,43 @@
+/**
+ * Asserts that the generated customPrompt contains the expected structural sections.
+ *
+ * A well-structured customPrompt should include most of these elements:
+ * - Domain/role context
+ * - When to generate / trigger rules
+ * - Component instructions
+ * - Workflow or constraints
+ *
+ * Pass required section keywords via config.sections (array of regex patterns).
+ * By default checks for broad structural markers.
+ */
+export default function (output, { config }) {
+  const sections = config?.sections || [
+    'domain|workflow|role|assist',
+    'form|component|field',
+    'sensitive|pii|personal',
+  ];
+
+  const lower = output.toLowerCase();
+  const missing = [];
+
+  for (const pattern of sections) {
+    const regex = new RegExp(pattern, 'i');
+    if (!regex.test(lower)) {
+      missing.push(pattern);
+    }
+  }
+
+  if (missing.length === 0) {
+    return {
+      pass: true,
+      score: 1,
+      reason: `All ${sections.length} expected section markers found`,
+    };
+  }
+
+  return {
+    pass: false,
+    score: (sections.length - missing.length) / sections.length,
+    reason: `Missing section markers: ${missing.join(', ')}`,
+  };
+}
diff --git a/evals/own-model/assertions/prompt-length.mjs b/evals/own-model/assertions/prompt-length.mjs
new file mode 100644
index 0000000..088cafe
--- /dev/null
+++ b/evals/own-model/assertions/prompt-length.mjs
@@ -0,0 +1,33 @@
+/**
+ * Asserts that the generated customPrompt is within a reasonable length range.
+ *
+ * config.min: minimum chars (default 200)
+ * config.max: maximum chars (default 8000)
+ */
+export default function (output, { config }) {
+  const min = config?.min ?? 200;
+  const max = config?.max ?? 8000;
+  const len = output.length;
+
+  if (len < min) {
+    return {
+      pass: false,
+      score: len / min,
+      reason: `Output too short: ${len} chars (minimum ${min})`,
+    };
+  }
+
+  if (len > max) {
+    return {
+      pass: false,
+      score: max / len,
+      reason: `Output too long: ${len} chars (maximum ${max})`,
+    };
+  }
+
+  return {
+    pass: true,
+    score: 1,
+    reason: `Output length ${len} chars (within ${min}-${max})`,
+  };
+}
diff --git a/evals/own-model/assertions/respects-flow-order.mjs b/evals/own-model/assertions/respects-flow-order.mjs
new file mode 100644
index 0000000..73ab932
--- /dev/null
+++ b/evals/own-model/assertions/respects-flow-order.mjs
@@ -0,0 +1,72 @@
+/**
+ * Asserts that the generated customPrompt respects the multi-step flow order.
+ * Checks that step labels/numbers appear in sequence in the output.
+ *
+ * config.stepLabels: string[] — ordered labels to check sequence
+ * config.minSteps: number — minimum number of distinct steps expected (default: 2)
+ */
+export default function (output, { config }) {
+  const lower = output.toLowerCase();
+  const minSteps = config?.minSteps || 2;
+
+  // Check for step numbering or sequential language
+  const stepNumbers = [];
+  for (let i = 1; i <= 10; i++) {
+    const patterns = [
+      new RegExp(`step\\s*${i}\\b`, 'i'),
+      new RegExp(`phase\\s*${i}\\b`, 'i'),
+      new RegExp(`\\*\\*${i}[\\.\\)]`, 'i'),
+    ];
+    if (patterns.some((p) => p.test(output))) {
+      stepNumbers.push(i);
+    }
+  }
+
+  if (stepNumbers.length < minSteps) {
+    // Fall back to checking for sequential language
+    const sequentialMarkers = [/first|initial|begin/, /then|next|after|subsequent|once.*submit/];
+    const foundSequential = sequentialMarkers.filter((m) => m.test(lower)).length;
+    if (foundSequential >= minSteps) {
+      return {
+        pass: true,
+        score: 0.8,
+        reason: `Found sequential flow language (${foundSequential} markers) but no explicit step numbers`,
+      };
+    }
+    return {
+      pass: false,
+      score: stepNumbers.length / minSteps,
+      reason: `Found only ${stepNumbers.length} step references, expected at least ${minSteps}`,
+    };
+  }
+
+  // Check ordering is correct (step 1 before step 2, etc.)
+  let inOrder = true;
+  for (let i = 1; i < stepNumbers.length; i++) {
+    const prevPos = output.toLowerCase().indexOf(`step ${stepNumbers[i - 1]}`);
+    const currPos = output.toLowerCase().indexOf(`step ${stepNumbers[i]}`);
+    if (prevPos >= 0 && currPos >= 0 && prevPos > currPos) {
+      inOrder = false;
+      break;
+    }
+  }
+
+  // Check step labels if provided
+  const stepLabels = config?.stepLabels || [];
+  let labelsFound = 0;
+  if (stepLabels.length > 0) {
+    for (const label of stepLabels) {
+      if (lower.includes(label.toLowerCase())) labelsFound++;
+    }
+  }
+
+  const labelScore = stepLabels.length > 0 ? labelsFound / stepLabels.length : 1;
+  const orderScore = inOrder ? 1 : 0.5;
+  const score = (labelScore + orderScore) / 2;
+
+  return {
+    pass: score >= 0.5,
+    score,
+    reason: `Found ${stepNumbers.length} steps (${inOrder ? 'in order' : 'out of order'})${stepLabels.length > 0 ? `, ${labelsFound}/${stepLabels.length} labels matched` : ''}`,
+  };
+}
diff --git a/evals/own-model/assertions/select-has-options.mjs b/evals/own-model/assertions/select-has-options.mjs
new file mode 100644
index 0000000..c4e967a
--- /dev/null
+++ b/evals/own-model/assertions/select-has-options.mjs
@@ -0,0 +1,15 @@
+/**
+ * Asserts that the output contains a select field with an options array.
+ */
+export default function (output) {
+  const hasSelect = output.includes('type: select');
+  const hasOptions = output.includes('options:');
+  if (hasSelect && hasOptions) {
+    return { pass: true, score: 1, reason: 'Select field has options' };
+  }
+  return {
+    pass: false,
+    score: 0,
+    reason: `Missing ${!hasSelect ? 'type: select' : 'options array'}`,
+  };
+}
diff --git a/evals/own-model/assertions/table-features.mjs b/evals/own-model/assertions/table-features.mjs
new file mode 100644
index 0000000..b9496bb
--- /dev/null
+++ b/evals/own-model/assertions/table-features.mjs
@@ -0,0 +1,18 @@
+/**
+ * Asserts that the output contains a table with sortable or filterable features.
+ */
+export default function (output) {
+  const hasTable = output.includes('type: table');
+  const hasSortable = output.includes('sortable: true');
+  const hasFilterable = output.includes('filterable: true');
+
+  if (hasTable && (hasSortable || hasFilterable)) {
+    const features = [hasSortable && 'sortable', hasFilterable && 'filterable'].filter(Boolean);
+    return { pass: true, score: 1, reason: `Table with ${features.join(' and ')} found` };
+  }
+  return {
+    pass: false,
+    score: hasTable ? 0.5 : 0,
+    reason: `Expected table with sortable/filterable. ${!hasTable ? 'No table found' : 'Missing data features'}`,
+  };
+}
diff --git a/evals/own-model/assertions/thinking-first.mjs b/evals/own-model/assertions/thinking-first.mjs
new file mode 100644
index 0000000..27c14ae
--- /dev/null
+++ b/evals/own-model/assertions/thinking-first.mjs
@@ -0,0 +1,14 @@
+/**
+ * Asserts that the first mdma block is a thinking component.
+ */
+export default function (output) {
+  const blocks = [...output.matchAll(/```mdma\n([\s\S]*?)```/g)];
+  if (blocks.length === 0) {
+    return { pass: false, score: 0, reason: 'No mdma blocks found' };
+  }
+  const firstBlock = blocks[0][1];
+  if (firstBlock.includes('type: thinking')) {
+    return { pass: true, score: 1, reason: 'Thinking block is first' };
+  }
+  return { pass: false, score: 0, reason: 'First mdma block is not a thinking component' };
+}
diff --git a/evals/own-model/assertions/unique-kebab-ids.mjs b/evals/own-model/assertions/unique-kebab-ids.mjs
new file mode 100644
index 0000000..dd9e97b
--- /dev/null
+++ b/evals/own-model/assertions/unique-kebab-ids.mjs
@@ -0,0 +1,24 @@
+/**
+ * Asserts that all component IDs are unique and follow kebab-case.
+ */
+export default function (output) {
+  const idMatches = [...output.matchAll(/^id:\s*(.+)$/gm)];
+  const ids = idMatches.map((m) => m[1].trim());
+
+  if (ids.length === 0) {
+    return { pass: false, score: 0, reason: 'No component IDs found' };
+  }
+
+  const unique = new Set(ids).size === ids.length;
+  if (!unique) {
+    return { pass: false, score: 0, reason: `Duplicate IDs found: ${ids.join(', ')}` };
+  }
+
+  const kebab = /^[a-z][a-z0-9]*(-[a-z0-9]+)*$/;
+  const nonKebab = ids.filter((id) => !kebab.test(id));
+  if (nonKebab.length > 0) {
+    return { pass: false, score: 0, reason: `Non-kebab-case IDs: ${nonKebab.join(', ')}` };
+  }
+
+  return { pass: true, score: 1, reason: `${ids.length} unique kebab-case IDs` };
+}
diff --git a/evals/own-model/assertions/validate-mdma-examples.mjs b/evals/own-model/assertions/validate-mdma-examples.mjs
new file mode 100644
index 0000000..0101755
--- /dev/null
+++ b/evals/own-model/assertions/validate-mdma-examples.mjs
@@ -0,0 +1,60 @@
+import { validate } from '@mobile-reality/mdma-validator';
+
+/**
+ * Extracts ```mdma blocks from a customPrompt and validates each one
+ * as a standalone MDMA document.
+ *
+ * Unlike validate-mdma.mjs (which validates the entire output as a document),
+ * this assertion handles the case where mdma blocks are embedded as examples
+ * inside instructional prose.
+ */
+export default function (output) {
+  const blockRegex = /```mdma\n([\s\S]*?)```/g;
+  const blocks = [...output.matchAll(blockRegex)];
+
+  if (blocks.length === 0) {
+    return {
+      pass: true,
+      score: 1,
+      reason: 'No mdma example blocks to validate (OK for customPrompt)',
+    };
+  }
+
+  const errors = [];
+  let validCount = 0;
+
+  for (let i = 0; i < blocks.length; i++) {
+    const blockContent = blocks[i][1].trim();
+    // Wrap each block back into a markdown document for the validator
+    const doc = `\`\`\`mdma\n${blockContent}\n\`\`\``;
+
+    const result = validate(doc, {
+      exclude: ['thinking-block'],
+      autoFix: false,
+    });
+
+    if (result.ok) {
+      validCount++;
+    } else {
+      const blockErrors = result.issues
+        .filter((issue) => issue.severity === 'error')
+        .map((issue) => `[${issue.ruleId}] ${issue.message}`)
+        .join('; ');
+      errors.push(`Block ${i + 1}: ${blockErrors}`);
+    }
+  }
+
+  if (errors.length === 0) {
+    return {
+      pass: true,
+      score: 1,
+      reason: `All ${validCount} mdma example block(s) are valid MDMA`,
+    };
+  }
+
+  return {
+    pass: false,
+    score: validCount / blocks.length,
+    reason: `${errors.length}/${blocks.length} mdma block(s) have validation errors:\n${errors.join('\n')}`,
+  };
+}
diff --git a/evals/own-model/assertions/validate-mdma.mjs b/evals/own-model/assertions/validate-mdma.mjs
new file mode 100644
index 0000000..6c84b64
--- /dev/null
+++ b/evals/own-model/assertions/validate-mdma.mjs
@@ -0,0 +1,41 @@
+import { validate } from '@mobile-reality/mdma-validator';
+
+/**
+ * Custom promptfoo assertion that runs the MDMA validator on LLM output.
+ *
+ * Returns pass if the validator reports no unfixed errors.
+ * On failure, includes a summary of all issues found.
+ *
+ * Optional config:
+ *   - exclude: string[] — additional rule IDs to skip on top of the
+ *     always-excluded `thinking-block` rule. Useful when a suite's
+ *     blueprints deliberately violate a stylistic rule (e.g.
+ *     `flow-ordering` for the custom-prompt suite, where prompts
+ *     intentionally bundle multiple components per message).
+ */
+export default function (output, { config } = {}) {
+  const extraExclude = Array.isArray(config?.exclude) ? config.exclude : [];
+  const result = validate(output, {
+    exclude: ['thinking-block', ...extraExclude],
+    autoFix: false,
+  });
+
+  if (result.ok) {
+    return {
+      pass: true,
+      score: 1,
+      reason: `Valid MDMA document (${result.summary.warnings} warnings, ${result.summary.infos} info)`,
+    };
+  }
+
+  const errorDetails = result.issues
+    .filter((issue) => issue.severity === 'error')
+    .map((issue) => `[${issue.ruleId}] ${issue.message}`)
+    .join('\n');
+
+  return {
+    pass: false,
+    score: 0,
+    reason: `Validation failed with ${result.summary.errors} error(s):\n${errorDetails}`,
+  };
+}
diff --git a/evals/own-model/assertions/yaml-not-json.mjs b/evals/own-model/assertions/yaml-not-json.mjs
new file mode 100644
index 0000000..ef2655a
--- /dev/null
+++ b/evals/own-model/assertions/yaml-not-json.mjs
@@ -0,0 +1,58 @@
+/**
+ * Asserts that all ```mdma blocks in the output use YAML syntax, not JSON.
+ *
+ * Checks:
+ * - No block starts with { or [
+ * - No block contains "type": or "fields": (JSON keys)
+ * - Every block starts with a YAML key: value pattern (e.g. type: form)
+ */
+export default function (output) {
+  const blocks = [...output.matchAll(/```mdma\n([\s\S]*?)```/g)];
+
+  if (blocks.length === 0) {
+    // No mdma blocks in a generated customPrompt is acceptable
+    // (the prompt might describe components without embedding code blocks)
+    return { pass: true, score: 1, reason: 'No mdma blocks to check (OK for customPrompt)' };
+  }
+
+  const issues = [];
+
+  for (let i = 0; i < blocks.length; i++) {
+    const content = blocks[i][1].trim();
+    const blockLabel = `block ${i + 1}`;
+
+    if (content.startsWith('{') || content.startsWith('[')) {
+      issues.push(`${blockLabel}: starts with JSON syntax`);
+    }
+
+    if (/"type"\s*:/.test(content)) {
+      issues.push(`${blockLabel}: contains JSON "type": key`);
+    }
+
+    if (/"fields"\s*:/.test(content)) {
+      issues.push(`${blockLabel}: contains JSON "fields": key`);
+    }
+
+    if (/"id"\s*:/.test(content)) {
+      issues.push(`${blockLabel}: contains JSON "id": key`);
+    }
+
+    if (!/^[a-zA-Z_-]+:\s/.test(content)) {
+      issues.push(`${blockLabel}: does not start with YAML key: value`);
+    }
+  }
+
+  if (issues.length === 0) {
+    return {
+      pass: true,
+      score: 1,
+      reason: `All ${blocks.length} mdma block(s) use valid YAML syntax`,
+    };
+  }
+
+  return {
+    pass: false,
+    score: 0,
+    reason: `JSON detected in mdma blocks:\n${issues.join('\n')}`,
+  };
+}
diff --git a/evals/own-model/prompt-author.mjs b/evals/own-model/prompt-author.mjs
new file mode 100644
index 0000000..925f8d1
--- /dev/null
+++ b/evals/own-model/prompt-author.mjs
@@ -0,0 +1,20 @@
+import { getAuthorPromptVariant } from '@mobile-reality/mdma-prompt-pack';
+
+/**
+ * Promptfoo prompt function — author suite for our model.
+ *
+ * DSL port of the flagship author suite (../tests.yaml): system = the
+ * `mobile-reality/mdma-il` author prompt from the prompt pack (DSL grammar +
+ * rules + form/table/chart examples), user = the scenario's DSL intent
+ * (`vars.request`, supplied by tests-author.yaml). Output is validated against
+ * the schema + the per-case structural assertions — pure DSL→MDMA generation,
+ * no customPrompt layer.
+ */
+const AUTHORING_SYSTEM_PROMPT = getAuthorPromptVariant('mobile-reality/mdma-il').prompt;
+
+export default function ({ vars }) {
+  return [
+    { role: 'system', content: `{% raw %}${AUTHORING_SYSTEM_PROMPT}{% endraw %}` },
+    { role: 'user', content: `{% raw %}${vars.request}{% endraw %}` },
+  ];
+}
diff --git a/evals/own-model/prompt-custom.mjs b/evals/own-model/prompt-custom.mjs
new file mode 100644
index 0000000..c6cb1af
--- /dev/null
+++ b/evals/own-model/prompt-custom.mjs
@@ -0,0 +1,28 @@
+import { getAuthorPromptVariant } from '@mobile-reality/mdma-prompt-pack';
+
+/**
+ * Promptfoo prompt function — custom-system-prompt suite for our model.
+ *
+ * The `mobile-reality/mdma-il` author prompt from the prompt pack (DSL grammar +
+ * rules + form/table/chart few-shot examples) is the author base; the test's
+ * `customPrompt` (the scenario intent in DSL) is layered after a separator; the
+ * NL `request` is the user msg.
+ *
+ * NOTE: we deliberately do NOT use buildSystemPrompt() here. Its generic
+ * "Reminder" footer is flagship/agentic guidance ("respond in plain Markdown",
+ * "the user should see a natural response") that pushes our DSL-tuned model to
+ * reply in prose with NO ```mdma blocks — measured ~60% of the time on the
+ * heaviest multi-component flow (5/5 pass with this lean join vs 2/5 with the
+ * footer). The authoring prompt already carries every rule our model needs.
+ */
+const AUTHORING_SYSTEM_PROMPT = getAuthorPromptVariant('mobile-reality/mdma-il').prompt;
+
+export default function ({ vars }) {
+  const system = vars.customPrompt
+    ? `${AUTHORING_SYSTEM_PROMPT}\n\n---\n\n${vars.customPrompt}`
+    : AUTHORING_SYSTEM_PROMPT;
+  return [
+    { role: 'system', content: `{% raw %}${system}{% endraw %}` },
+    { role: 'user', content: `{% raw %}${vars.request}{% endraw %}` },
+  ];
+}
diff --git a/evals/own-model/prompt-fixer.mjs b/evals/own-model/prompt-fixer.mjs
new file mode 100644
index 0000000..30bfee8
--- /dev/null
+++ b/evals/own-model/prompt-fixer.mjs
@@ -0,0 +1,39 @@
+import {
+  buildFixerMessage,
+  buildFixerPrompt,
+  buildSystemPrompt,
+} from '@mobile-reality/mdma-prompt-pack';
+import { validate } from '@mobile-reality/mdma-validator';
+
+/**
+ * Promptfoo prompt function — fixer suite for our model.
+ *
+ * NOTE: this is OFF-CONTRACT for our model. The model was trained DSL→MDMA;
+ * the fixer task takes a BROKEN MDMA document (+ validator errors) and asks the
+ * model to repair it — not a DSL intent. We run it anyway as a capability probe:
+ * can the DSL-specialized model also repair MDMA, or does it refuse?
+ *
+ * Pipeline mirrors the flagship fixer eval (../prompt-fixer.mjs): run the
+ * validator to surface remaining issues, then send the canonical fixer system
+ * prompt (default author spec + fixer instructions) + the broken doc / issues.
+ */
+export default function ({ vars }) {
+  const variantKey = vars.variantKey ?? 'single-block';
+  const exclude = ['thinking-block'];
+  if (variantKey !== 'flow') exclude.push('flow-ordering');
+
+  const result = validate(vars.brokenDocument, { exclude });
+  const allIssues = result.issues.filter((i) => i.severity === 'error' || i.severity === 'warning');
+
+  const fixerPrompt = buildFixerPrompt(variantKey);
+  const systemPrompt = `${buildSystemPrompt()}\n\n---\n\n${fixerPrompt}`;
+  const userMessage = buildFixerMessage(vars.brokenDocument, allIssues, {
+    conversationHistory: vars.conversationHistory ?? undefined,
+    promptContext: vars.promptContext ?? undefined,
+  });
+
+  return [
+    { role: 'system', content: `{% raw %}${systemPrompt}{% endraw %}` },
+    { role: 'user', content: `{% raw %}${userMessage}{% endraw %}` },
+  ];
+}
diff --git a/evals/own-model/prompt-guidance.mjs b/evals/own-model/prompt-guidance.mjs
new file mode 100644
index 0000000..8468cdd
--- /dev/null
+++ b/evals/own-model/prompt-guidance.mjs
@@ -0,0 +1,25 @@
+/**
+ * Promptfoo prompt function — agent guidance suite for our model.
+ *
+ * Agentic tool-calling probe: the model is given the `generate_mdma` tool (in
+ * the provider config) and an NL request; it should CALL the tool for
+ * document-creation requests and NOT call it for conversational ones
+ * (asserted by calls-generate-mdma).
+ *
+ * ⚠️ Requires the endpoint to have function-calling enabled
+ * (vLLM `--enable-auto-tool-choice` + `--tool-call-parser`). Without it the
+ * endpoint returns HTTP 400 for `tool_choice: auto`.
+ */
+const SYSTEM_PROMPT =
+  'You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents ' +
+  '(forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). ' +
+  'Call `generate_mdma` whenever the user asks you to create, build, design, or update an ' +
+  'interactive document or UI. For greetings, questions about capabilities, explanations, or ' +
+  'other conversational replies, respond normally and do NOT call the tool.';
+
+export default function ({ vars }) {
+  return [
+    { role: 'system', content: `{% raw %}${SYSTEM_PROMPT}{% endraw %}` },
+    { role: 'user', content: `{% raw %}${vars.request}{% endraw %}` },
+  ];
+}
diff --git a/evals/own-model/prompt.mjs b/evals/own-model/prompt.mjs
new file mode 100644
index 0000000..0447fdc
--- /dev/null
+++ b/evals/own-model/prompt.mjs
@@ -0,0 +1,21 @@
+import { getAuthorPromptVariant } from '@mobile-reality/mdma-prompt-pack';
+
+/**
+ * Promptfoo prompt function — MDMA-IL DSL holdout gate.
+ *
+ * System message = the `mobile-reality/mdma-il` author prompt (DSL grammar +
+ * worked examples), looked up directly from the registry, decoupled from the
+ * provider id. User message = the MDMA-IL DSL intent from each holdout case
+ * (`vars.request`, supplied by tests-dsl.mjs).
+ *
+ * Both are wrapped in {% raw %} so Nunjucks passes the DSL (and any `{...}`
+ * select-option braces) through verbatim.
+ */
+const SYSTEM_PROMPT = getAuthorPromptVariant('mobile-reality/mdma-il').prompt;
+
+export default function ({ vars }) {
+  return [
+    { role: 'system', content: `{% raw %}${SYSTEM_PROMPT}{% endraw %}` },
+    { role: 'user', content: `{% raw %}${vars.request}{% endraw %}` },
+  ];
+}
diff --git a/evals/own-model/promptfooconfig.own-model-author.yaml b/evals/own-model/promptfooconfig.own-model-author.yaml
new file mode 100644
index 0000000..c8608e9
--- /dev/null
+++ b/evals/own-model/promptfooconfig.own-model-author.yaml
@@ -0,0 +1,38 @@
+# MDMA Author — own model (DSL port of the flagship author suite)
+#
+# The 28 author scenarios from ../tests.yaml expressed as DSL intents
+# (tests-author.yaml), driven by our shared authoring system prompt
+# (prompt-author.mjs: system = authoring prompt, user = DSL). Output validated
+# against the schema + per-case structural assertions. DSL is the input.
+#
+# Run (serial): pnpm --filter @mobile-reality/mdma-evals eval:own-model:author
+
+description: MDMA Author Eval (DSL) — own model
+
+envPath: ../.env
+outputPath: own-model/results-author.json
+
+prompts:
+  - file://prompt-author.mjs
+
+providers:
+  - id: "{{ env.OWN_MODEL_PROVIDER or 'openai:chat:mdma-26b' }}"
+    config:
+      apiBaseUrl: "{{ env.OWN_MODEL_BASE_URL }}"
+      apiKey: "{{ env.OWN_MODEL_API_KEY }}"
+      temperature: 1
+      max_tokens: 2048
+      # Cut the Gemma 4 reasoning repetition loop (see repetition-loops.md).
+      min_p: 0.02
+      repetition_penalty: 1.1
+      chat_template_kwargs:
+        enable_thinking: false
+
+defaultTest:
+  assert:
+    - type: javascript
+      value: file://assertions/validate-mdma.mjs
+      config:
+        exclude: [flow-ordering]
+
+tests: file://tests-author.yaml
diff --git a/evals/own-model/promptfooconfig.own-model-custom.yaml b/evals/own-model/promptfooconfig.own-model-custom.yaml
new file mode 100644
index 0000000..586a88a
--- /dev/null
+++ b/evals/own-model/promptfooconfig.own-model-custom.yaml
@@ -0,0 +1,45 @@
+# MDMA Author + Custom System Prompt — own model (MDMA-IL)
+#
+# Same eval as the flagship models' custom suite (promptfooconfig.custom.yaml):
+# the model's author/schema prompt + the test's customPrompt layered into the
+# SYSTEM message (buildSystemPrompt), the NL request as the user message, output
+# validated against the schema. Only the provider (our 31B) and the author
+# variant (mobile-reality/mdma-il) differ.
+#
+# No temperature override — default sampling (the model should stay conversational
+# and still produce MDMA). enable_thinking=false per the endpoint contract.
+#
+# Run (serial): pnpm --filter @mobile-reality/mdma-evals eval:own-model:custom
+
+description: MDMA Author + Custom System Prompt Eval — own model
+
+envPath: ../.env
+outputPath: own-model/results-custom.json
+
+prompts:
+  - file://prompt-custom.mjs
+
+providers:
+  - id: "{{ env.OWN_MODEL_PROVIDER or 'openai:chat:mdma-31b' }}"
+    config:
+      apiBaseUrl: "{{ env.OWN_MODEL_BASE_URL }}"
+      apiKey: "{{ env.OWN_MODEL_API_KEY }}"
+      # AWQ endpoint: max_model_len 16384, so no tight output cap needed.
+      # temperature 1 (conversational sampling) — non-deterministic, results may
+      # vary run-to-run.
+      temperature: 1
+      max_tokens: 4096
+      # Cut the Gemma 4 reasoning repetition loop (see repetition-loops.md).
+      min_p: 0.02
+      repetition_penalty: 1.1
+      chat_template_kwargs:
+        enable_thinking: false
+
+defaultTest:
+  assert:
+    - type: javascript
+      value: file://assertions/validate-mdma.mjs
+      config:
+        exclude: [flow-ordering]
+
+tests: file://tests-custom.yaml
diff --git a/evals/own-model/promptfooconfig.own-model-fixer.yaml b/evals/own-model/promptfooconfig.own-model-fixer.yaml
new file mode 100644
index 0000000..99bc4cf
--- /dev/null
+++ b/evals/own-model/promptfooconfig.own-model-fixer.yaml
@@ -0,0 +1,30 @@
+# MDMA Fixer — own model (capability probe)
+#
+# OFF-CONTRACT for our DSL→MDMA model: the input is a broken MDMA document to
+# repair, not a DSL intent. Run as a probe — can the model fix MDMA, or refuse?
+# Reuses the flagship fixer test set (../tests-fixer.yaml) and its assertions.
+#
+# Run (serial): pnpm --filter @mobile-reality/mdma-evals eval:own-model:fixer
+
+description: MDMA Fixer Eval (capability probe) — own model
+
+envPath: ../.env
+outputPath: own-model/results-fixer.json
+
+prompts:
+  - file://prompt-fixer.mjs
+
+providers:
+  - id: "{{ env.OWN_MODEL_PROVIDER or 'openai:chat:mdma-26b' }}"
+    config:
+      apiBaseUrl: "{{ env.OWN_MODEL_BASE_URL }}"
+      apiKey: "{{ env.OWN_MODEL_API_KEY }}"
+      temperature: 1
+      max_tokens: 2048
+      # Cut the Gemma 4 reasoning repetition loop (see repetition-loops.md).
+      min_p: 0.02
+      repetition_penalty: 1.1
+      chat_template_kwargs:
+        enable_thinking: false
+
+tests: ../tests-fixer.yaml
diff --git a/evals/own-model/promptfooconfig.own-model-flows.yaml b/evals/own-model/promptfooconfig.own-model-flows.yaml
new file mode 100644
index 0000000..b255579
--- /dev/null
+++ b/evals/own-model/promptfooconfig.own-model-flows.yaml
@@ -0,0 +1,38 @@
+# MDMA Example Flows — own model (DSL port of the flagship flows suite)
+#
+# The 15 example-flow scenarios from ../tests-flows.yaml with each customPrompt
+# expressed in DSL (tests-flows.yaml). Reuses the custom builder (prompt-custom.mjs:
+# authoring prompt + customPrompt + NL request). Output validated against the
+# schema + per-case structural assertions.
+#
+# Run (serial): pnpm --filter @mobile-reality/mdma-evals eval:own-model:flows
+
+description: MDMA Example Flows Eval (DSL) — own model
+
+envPath: ../.env
+outputPath: own-model/results-flows.json
+
+prompts:
+  - file://prompt-custom.mjs
+
+providers:
+  - id: "{{ env.OWN_MODEL_PROVIDER or 'openai:chat:mdma-26b' }}"
+    config:
+      apiBaseUrl: "{{ env.OWN_MODEL_BASE_URL }}"
+      apiKey: "{{ env.OWN_MODEL_API_KEY }}"
+      temperature: 1
+      max_tokens: 2048
+      # Cut the Gemma 4 reasoning repetition loop (see repetition-loops.md).
+      min_p: 0.02
+      repetition_penalty: 1.1
+      chat_template_kwargs:
+        enable_thinking: false
+
+defaultTest:
+  assert:
+    - type: javascript
+      value: file://assertions/validate-mdma.mjs
+      config:
+        exclude: [flow-ordering]
+
+tests: file://tests-flows.yaml
diff --git a/evals/own-model/promptfooconfig.own-model-guidance.yaml b/evals/own-model/promptfooconfig.own-model-guidance.yaml
new file mode 100644
index 0000000..79074ca
--- /dev/null
+++ b/evals/own-model/promptfooconfig.own-model-guidance.yaml
@@ -0,0 +1,52 @@
+# MDMA Agent Guidance — own model (agentic tool-calling)
+#
+# Tests whether the model correctly CALLS the generate_mdma tool for
+# document-creation requests (and not for conversational ones). DSL port not
+# applicable — this is a tool-calling decision, driven by NL requests
+# (../tests-guidance.yaml).
+#
+# ⚠️ REQUIRES the endpoint to have function-calling enabled (vLLM
+# --enable-auto-tool-choice + --tool-call-parser). Until then the endpoint
+# returns HTTP 400 for tool_choice: auto.
+#
+# Run (serial): pnpm --filter @mobile-reality/mdma-evals eval:own-model:guidance
+
+description: MDMA Agent Guidance Eval (tool-calling) — own model
+
+envPath: ../.env
+outputPath: own-model/results-guidance.json
+
+prompts:
+  - file://prompt-guidance.mjs
+
+providers:
+  - id: "{{ env.OWN_MODEL_PROVIDER or 'openai:chat:mdma-26b' }}"
+    config:
+      apiBaseUrl: "{{ env.OWN_MODEL_BASE_URL }}"
+      apiKey: "{{ env.OWN_MODEL_API_KEY }}"
+      temperature: 1
+      max_tokens: 1024
+      # Cut the Gemma 4 reasoning repetition loop (see repetition-loops.md).
+      min_p: 0.02
+      repetition_penalty: 1.1
+      chat_template_kwargs:
+        enable_thinking: false
+      tool_choice: auto
+      tools:
+        - type: function
+          function:
+            name: generate_mdma
+            description: >
+              Generate an MDMA Markdown document to present structured
+              interactive content to the user (forms, tables, tasklists,
+              approval gates, charts, callouts, buttons, webhooks).
+            parameters:
+              type: object
+              properties:
+                document:
+                  type: string
+                  description: The complete MDMA Markdown document.
+              required:
+                - document
+
+tests: ../tests-guidance.yaml
diff --git a/evals/own-model/promptfooconfig.own-model.yaml b/evals/own-model/promptfooconfig.own-model.yaml
new file mode 100644
index 0000000..29ef88e
--- /dev/null
+++ b/evals/own-model/promptfooconfig.own-model.yaml
@@ -0,0 +1,52 @@
+# MDMA-IL DSL holdout gate — Mobile Reality's own model
+#
+# Our self-hosted model (Gemma-4-E4B + v3 MDMA-IL LoRA) takes ONE MDMA-IL DSL
+# intent and returns an MDMA document. This suite is the DSL holdout gate: feed
+# the 95 held-out scenarios in DSL form (../gemma/dataset/data/holdout-dsl.jsonl
+# via tests-dsl.mjs) and validate the MDMA output.
+#
+# The system prompt is the mobile-reality/mdma-il author prompt (DSL grammar +
+# worked examples). The served context is only 2048 tokens on E4B, so the prompt
+# stays lean. See README.md.
+#
+# Model is plugged in via OWN_MODEL_* in ../.env (OpenAI-compatible endpoint).
+#
+# Run:  pnpm --filter @mobile-reality/mdma-evals eval:own-model
+# View: pnpm --filter @mobile-reality/mdma-evals eval:view
+
+description: MDMA-IL DSL Holdout Gate — own model
+
+envPath: ../.env
+outputPath: own-model/results.json
+
+prompts:
+  - file://prompt.mjs
+
+providers:
+  # OpenAI-compatible DSL endpoint (31B mdma-31b). Set OWN_MODEL_* in ../.env.
+  # Contract: v3 system prompt verbatim + DSL
+  # user message; temperature 0; enable_thinking=false (else thinking leaks in).
+  - id: "{{ env.OWN_MODEL_PROVIDER or 'openai:chat:mdma-31b' }}"
+    config:
+      apiBaseUrl: "{{ env.OWN_MODEL_BASE_URL }}"
+      apiKey: "{{ env.OWN_MODEL_API_KEY }}"
+      temperature: 1
+      # 31B has a large context (no 2048 cap like E4B); 1024 truncated big
+      # multi-component docs (callout + full data table), so raised to 2048.
+      max_tokens: 2048
+      # Cut the Gemma 4 reasoning repetition loop (see repetition-loops.md).
+      min_p: 0.02
+      repetition_penalty: 1.1
+      chat_template_kwargs:
+        enable_thinking: false
+
+defaultTest:
+  assert:
+    # The gate: every output must be a valid MDMA document. thinking-block is
+    # always excluded by the assertion; the holdout has no multi-step flows.
+    - type: javascript
+      value: file://assertions/validate-mdma.mjs
+      config:
+        exclude: [flow-ordering]
+
+tests: file://tests-dsl.mjs
diff --git a/evals/own-model/results-author.json b/evals/own-model/results-author.json
new file mode 100644
index 0000000..5318acc
--- /dev/null
+++ b/evals/own-model/results-author.json
@@ -0,0 +1,5273 @@
+{
+  "evalId": "eval-7qF-2026-06-30T12:37:01",
+  "results": {
+    "version": 3,
+    "timestamp": "2026-06-30T12:37:01.882Z",
+    "prompts": [
+      {
+        "raw": "function ({ vars }) {\n  return [\n    { role: 'system', content: `{% raw %}${AUTHORING_SYSTEM_PROMPT}{% endraw %}` },\n    { role: 'user', content: `{% raw %}${vars.request}{% endraw %}` },\n  ];\n}",
+        "label": "own-model/prompt-author.mjs",
+        "config": {},
+        "id": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "provider": "openai:chat:mdma-26b",
+        "metrics": {
+          "score": 28,
+          "testPassCount": 28,
+          "testFailCount": 0,
+          "testErrorCount": 0,
+          "assertPassCount": 92,
+          "assertFailCount": 0,
+          "totalLatencyMs": 47883,
+          "tokenUsage": {
+            "prompt": 40987,
+            "completion": 4609,
+            "cached": 0,
+            "total": 45596,
+            "numRequests": 28,
+            "completionDetails": {
+              "reasoning": 0,
+              "acceptedPrediction": 0,
+              "rejectedPrediction": 0,
+              "cacheReadInputTokens": 0,
+              "cacheCreationInputTokens": 0
+            },
+            "assertions": {
+              "total": 0,
+              "prompt": 0,
+              "completion": 0,
+              "cached": 0,
+              "numRequests": 0,
+              "completionDetails": {
+                "reasoning": 0,
+                "acceptedPrediction": 0,
+                "rejectedPrediction": 0,
+                "cacheReadInputTokens": 0,
+                "cacheCreationInputTokens": 0
+              }
+            }
+          },
+          "namedScores": {},
+          "namedScoresCount": {},
+          "namedScoreWeights": {},
+          "cost": 0
+        }
+      }
+    ],
+    "results": [
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 3 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 3
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            }
+          ]
+        },
+        "id": "b74a68fa-01bd-47df-b639-ac9aa7f8be81",
+        "latencyMs": 2241,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"form#contact-form[full-name*:t, email*^:e, message*:ta](action=contact-submitted)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Contact form: requires name, sensitive email, and a message; submits via contact-submitted.\n```\n\n```mdma\ntype: form\nid: contact-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\n  - name: message\n    type: textarea\n    label: \"Message\"\n    required: true\nonSubmit: contact-submitted\n```",
+          "tokenUsage": {
+            "total": 1617,
+            "prompt": 1467,
+            "completion": 150,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2241,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1159",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:01 GMT",
+                "modal-function-call-id": "fc-01KWC8GVQXS7GY3T1FADQGTC4K",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Contact form (DSL)",
+          "vars": {
+            "request": "form#contact-form[full-name*:t, email*^:e, message*:ta](action=contact-submitted)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 3
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 0,
+        "vars": {
+          "request": "form#contact-form[full-name*:t, email*^:e, message*:ta](action=contact-submitted)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1159",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:01 GMT",
+              "modal-function-call-id": "fc-01KWC8GVQXS7GY3T1FADQGTC4K",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 4 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 4
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 3 sensitive flags",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/pii-sensitive.mjs"
+              }
+            }
+          ]
+        },
+        "id": "388d4b57-5d17-4725-80cc-7d8a786522ec",
+        "latencyMs": 2465,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"form#employee-pii-form[full-name*:t, email*^:e, phone^:t, ssn*^:t](action=employee-pii-submitted)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Employee PII Collection Form: requires full name, sensitive email, sensitive phone number, and sensitive SSN; submits via employee-pii-submitted.\n```\n\n```mdma\ntype: form\nid: employee-pii-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: \"Phone Number\"\n    sensitive: true\n  - name: ssn\n    type: text\n    label: \"Social Security Number\"\n    required: true\n    sensitive: true\nonSubmit: employee-pii-submitted\n```",
+          "tokenUsage": {
+            "total": 1682,
+            "prompt": 1481,
+            "completion": 201,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2465,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1342",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:03 GMT",
+                "modal-function-call-id": "fc-01KWC8GXRDAYT7T8W2XYC12P87",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Employee PII form, sensitive fields (DSL)",
+          "vars": {
+            "request": "form#employee-pii-form[full-name*:t, email*^:e, phone^:t, ssn*^:t](action=employee-pii-submitted)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 4
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/pii-sensitive.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 1,
+        "vars": {
+          "request": "form#employee-pii-form[full-name*:t, email*^:e, phone^:t, ssn*^:t](action=employee-pii-submitted)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1342",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:03 GMT",
+              "modal-function-call-id": "fc-01KWC8GXRDAYT7T8W2XYC12P87",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "callout"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "a9060477-3822-41fa-8c00-c932b680f3c9",
+        "latencyMs": 1548,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"callout#maintenance-warning(variant=warning)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  The user requested a maintenance warning callout with a warning variant. I will provide a single callout component.\n```\n\n```mdma\ntype: callout\nid: maintenance-warning\nvariant: warning\ncontent: |\n  Scheduled system maintenance is occurring on Saturday at 02:00 UTC. Users may experience intermittent connectivity or momentary outages during this window. We apologize for the inconvenience.\n```",
+          "tokenUsage": {
+            "total": 1561,
+            "prompt": 1450,
+            "completion": 111,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1548,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1117",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:06 GMT",
+                "modal-function-call-id": "fc-01KWC8H06HQVEPKNYM3V9W241Y",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Maintenance callout (DSL)",
+          "vars": {
+            "request": "callout#maintenance-warning(variant=warning)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "callout"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 2,
+        "vars": {
+          "request": "callout#maintenance-warning(variant=warning)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1117",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:06 GMT",
+              "modal-function-call-id": "fc-01KWC8H06HQVEPKNYM3V9W241Y",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "not-contains",
+                "value": "```markdown"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: button",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "button"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "40343da3-5245-45a1-bdd7-01cd6b444ccc",
+        "latencyMs": 700,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"button#submit-report-btn(text=\\\"Submit Report\\\", action=submit-report, variant=primary)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: button\nid: submit-report-btn\ntext: \"Submit Report\"\nonAction: submit-report\nvariant: primary\n```",
+          "tokenUsage": {
+            "total": 1498,
+            "prompt": 1462,
+            "completion": 36,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 700,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "748",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:07 GMT",
+                "modal-function-call-id": "fc-01KWC8H1R00RDPA0J8K4JVHVAT",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Submit report button (DSL)",
+          "vars": {
+            "request": "button#submit-report-btn(text=\"Submit Report\", action=submit-report, variant=primary)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "not-contains",
+              "value": "```markdown"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "button"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 3,
+        "vars": {
+          "request": "button#submit-report-btn(text=\"Submit Report\", action=submit-report, variant=primary)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "748",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:07 GMT",
+              "modal-function-call-id": "fc-01KWC8H1R00RDPA0J8K4JVHVAT",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            }
+          ]
+        },
+        "id": "c2a408e0-76cb-4c8d-8e12-11bdb0ef1721",
+        "latencyMs": 2342,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"form#country-selector-form[country*:s{us|uk|ca|au|de}](action=country-form-submitted)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Creating a single-select country selector form with four options; submits on the country-form-submitted action.\n```\n\n```mdma\ntype: form\nid: country-selector-form\nfields:\n  - name: country\n    type: select\n    label: \"Country\"\n    required: true\n    options:\n      - label: \"United States\"\n        value: \"us\"\n      - label: \"United Kingdom\"\n        value: \"uk\"\n      - label: \"Canada\"\n        value: \"ca\"\n      - label: \"Australia\"\n        value: \"au\"\n      - label: \"Germany\"\n        value: \"de\"\nonSubmit: country-form-submitted\n```",
+          "tokenUsage": {
+            "total": 1654,
+            "prompt": 1470,
+            "completion": 184,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2342,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1294",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:08 GMT",
+                "modal-function-call-id": "fc-01KWC8H2FT0GRGZXCPQEWQ0C3V",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Country selector form (DSL)",
+          "vars": {
+            "request": "form#country-selector-form[country*:s{us|uk|ca|au|de}](action=country-form-submitted)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 4,
+        "vars": {
+          "request": "form#country-selector-form[country*:s{us|uk|ca|au|de}](action=country-form-submitted)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1294",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:08 GMT",
+              "modal-function-call-id": "fc-01KWC8H2FT0GRGZXCPQEWQ0C3V",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: table",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "table"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Table with sortable and filterable found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/table-features.mjs"
+              }
+            }
+          ]
+        },
+        "id": "5813070e-d3b7-4185-84f2-a6d9608060cf",
+        "latencyMs": 2918,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"table#languages-table\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Table component for a list of languages. I will define realistic columns for language name, code, and proficiency level.\n```\n\n```mdma\ntype: table\nid: languages-table\nsortable: true\nfilterable: true\ncolumns:\n  - key: name\n    header: \"Language\"\n    sortable: true\n  - key: code\n    header: \"Code\"\n  - key: proficiency\n    header: \"Proficiency\"\n    sortable: true\n  - key: native\n    header: \"Native\"\ndata:\n  - { name: \"English\", code: \"en\", proficiency: \"Fluent\", native: true }\n  - { name: \"Spanish\", code: \"es\", proficiency: \"Professional\", native: false }\n  - { name: \"Japanese\", code: \"ja\", proficiency: \"Limited Working\", native: false }\n  - { name: \"French\", code: \"fr\", proficiency: \"Elementary\", native: false }\n```",
+          "tokenUsage": {
+            "total": 1684,
+            "prompt": 1444,
+            "completion": 240,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2918,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1494",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:10 GMT",
+                "modal-function-call-id": "fc-01KWC8H4RZ8M87CKNM95VXQXPH",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Languages table (DSL)",
+          "vars": {
+            "request": "table#languages-table\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "table"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/table-features.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 5,
+        "vars": {
+          "request": "table#languages-table\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1494",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:10 GMT",
+              "modal-function-call-id": "fc-01KWC8H4RZ8M87CKNM95VXQXPH",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: chart",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "chart"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Bar chart variant found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/bar-chart.mjs"
+              }
+            }
+          ]
+        },
+        "id": "4921a874-5e35-4c62-91ed-66182ac428b4",
+        "latencyMs": 1714,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"chart#quarterly-sales(variant=bar)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Bar chart for quarterly sales revenue with realistic sample data.\n```\n\n```mdma\ntype: chart\nid: quarterly-sales\nvariant: bar\nlabel: \"Quarterly Sales Revenue\"\ndata: |\n  Quarter, Revenue\n  Q1, 125000\n  Q2, 142000\n  Q3, 138000\n  Q4, 165000\nxAxis: Quarter\n```",
+          "tokenUsage": {
+            "total": 1579,
+            "prompt": 1450,
+            "completion": 129,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1714,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "987",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:13 GMT",
+                "modal-function-call-id": "fc-01KWC8H7N0QJRSKE83PR272VT2",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Quarterly sales bar chart (DSL)",
+          "vars": {
+            "request": "chart#quarterly-sales(variant=bar)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "chart"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/bar-chart.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 6,
+        "vars": {
+          "request": "chart#quarterly-sales(variant=bar)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "987",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:13 GMT",
+              "modal-function-call-id": "fc-01KWC8H7N0QJRSKE83PR272VT2",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: approval-gate",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "approval-gate"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "899627b2-3f04-41de-a972-bcfa417c9f71",
+        "latencyMs": 936,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"approval-gate#document-approval\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Approval gate for document review; requires a title.\n```\n\n```mdma\ntype: approval-gate\nid: document-approval\ntitle: \"Document Review and Approval\"\n```",
+          "tokenUsage": {
+            "total": 1511,
+            "prompt": 1446,
+            "completion": 65,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 936,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "873",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:15 GMT",
+                "modal-function-call-id": "fc-01KWC8H9BCSPZ595DQ55Y1E1Q8",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Document approval gate (DSL)",
+          "vars": {
+            "request": "approval-gate#document-approval\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "approval-gate"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 7,
+        "vars": {
+          "request": "approval-gate#document-approval\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "873",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:15 GMT",
+              "modal-function-call-id": "fc-01KWC8H9BCSPZ595DQ55Y1E1Q8",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 3 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 3
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            }
+          ]
+        },
+        "id": "329138f6-0bb5-4ba7-a32d-9ba5025662fc",
+        "latencyMs": 1962,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"form#severity-form[title*:t, reporter-email*^:e, severity*:s{critical|high|medium|low}](action=triage-submitted)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Triage form: collecting a required title, required sensitive reporter email, and a required severity selection; submits via triage-submitted.\n```\n\n```mdma\ntype: form\nid: severity-form\nfields:\n  - name: title\n    type: text\n    label: \"Issue Title\"\n    required: true\n  - name: reporter-email\n    type: email\n    label: \"Reporter Email\"\n    required: true\n    sensitive: true\n  - name: severity\n    type: select\n    label: \"Severity\"\n    required: true\n    options:\n      - label: \"Critical\"\n        value: critical\n      - label: \"High\"\n        value: high\n      - label: \"Medium\"\n        value: medium\n      - label: \"Low\"\n        value: low\nonSubmit: triage-submitted\n```",
+          "tokenUsage": {
+            "total": 1691,
+            "prompt": 1477,
+            "completion": 214,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1962,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1433",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:16 GMT",
+                "modal-function-call-id": "fc-01KWC8HAA550G4G76F04BM9N0V",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Incident triage severity form (DSL)",
+          "vars": {
+            "request": "form#severity-form[title*:t, reporter-email*^:e, severity*:s{critical|high|medium|low}](action=triage-submitted)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 3
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 8,
+        "vars": {
+          "request": "form#severity-form[title*:t, reporter-email*^:e, severity*:s{critical|high|medium|low}](action=triage-submitted)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1433",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:16 GMT",
+              "modal-function-call-id": "fc-01KWC8HAA550G4G76F04BM9N0V",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "3 unique kebab-case IDs",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/unique-kebab-ids.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form, callout",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 3 components (min: 2)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/component-count.mjs",
+                "config": {
+                  "min": 2
+                }
+              }
+            }
+          ]
+        },
+        "id": "cc1501b0-6ffe-4981-ac56-76d1026c59ca",
+        "latencyMs": 1710,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"form#settings-form[display-name*:t, email*^:e](action=settings-saved)\\ncallout#settings-saved(variant=success)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  User settings form: captures display-name (required) and email (required, sensitive); submits via settings-saved. \n  A success callout will display once settings are saved.\n```\n\n```mdma\ntype: form\nid: settings-form\nfields:\n  - name: display-name\n    type: text\n    label: \"Display Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    required: true\n    sensitive: true\nonSubmit: settings-saved\n```\n\n```mdma\ntype: callout\nid: settings-saved\nvariant: success\ncontent: \"Your preferences and contact information have been successfully updated.\"\n```",
+          "tokenUsage": {
+            "total": 1657,
+            "prompt": 1475,
+            "completion": 182,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1710,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1326",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:18 GMT",
+                "modal-function-call-id": "fc-01KWC8HC7SFXAPKG1BFA006HR9",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Settings form + saved callout (DSL)",
+          "vars": {
+            "request": "form#settings-form[display-name*:t, email*^:e](action=settings-saved)\ncallout#settings-saved(variant=success)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/unique-kebab-ids.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/component-count.mjs",
+              "config": {
+                "min": 2
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 9,
+        "vars": {
+          "request": "form#settings-form[display-name*:t, email*^:e](action=settings-saved)\ncallout#settings-saved(variant=success)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1326",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:18 GMT",
+              "modal-function-call-id": "fc-01KWC8HC7SFXAPKG1BFA006HR9",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Callout with variant: warning found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/callout-variant.mjs",
+                "config": {
+                  "variant": "warning"
+                }
+              }
+            }
+          ]
+        },
+        "id": "dd73a400-e033-43c4-abc4-ac17a7aa85a3",
+        "latencyMs": 980,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"callout#password-expiry(variant=warning)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Generate a single warning callout informing the user about upcoming password expiration.\n```\n\n```mdma\ntype: callout\nid: password-expiry\nvariant: warning\ncontent: |\n  Your password is set to expire in 7 days. Please update it soon to avoid any disruption to your account access.\n```",
+          "tokenUsage": {
+            "total": 1543,
+            "prompt": 1450,
+            "completion": 93,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 980,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1005",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:20 GMT",
+                "modal-function-call-id": "fc-01KWC8HDXJASCHD30ED7YDGR97",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Password-expiry warning callout (DSL)",
+          "vars": {
+            "request": "callout#password-expiry(variant=warning)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/callout-variant.mjs",
+              "config": {
+                "variant": "warning"
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 10,
+        "vars": {
+          "request": "callout#password-expiry(variant=warning)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1005",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:20 GMT",
+              "modal-function-call-id": "fc-01KWC8HDXJASCHD30ED7YDGR97",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Callout with variant: error found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/callout-variant.mjs",
+                "config": {
+                  "variant": "error"
+                }
+              }
+            }
+          ]
+        },
+        "id": "4a9d46dc-bd18-4514-854c-b545a903d0cd",
+        "latencyMs": 1005,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"callout#payment-error(variant=error)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Creating an error callout to notify the user about a failed payment attempt.\n```\n\n```mdma\ntype: callout\nid: payment-error\nvariant: error\ncontent: \"The payment was declined. Please check your card details and try again, or use a different payment method.\"\n```",
+          "tokenUsage": {
+            "total": 1539,
+            "prompt": 1450,
+            "completion": 89,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1005,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "983",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:21 GMT",
+                "modal-function-call-id": "fc-01KWC8HEZ308BMSD07R69AK0TV",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Payment-error callout (DSL)",
+          "vars": {
+            "request": "callout#payment-error(variant=error)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/callout-variant.mjs",
+              "config": {
+                "variant": "error"
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 11,
+        "vars": {
+          "request": "callout#payment-error(variant=error)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "983",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:21 GMT",
+              "modal-function-call-id": "fc-01KWC8HEZ308BMSD07R69AK0TV",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Callout with variant: success found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/callout-variant.mjs",
+                "config": {
+                  "variant": "success"
+                }
+              }
+            }
+          ]
+        },
+        "id": "85cc6e85-0bb8-45a0-bc85-d439cafee002",
+        "latencyMs": 953,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"callout#account-verified(variant=success)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Verified account callout; displays a success message to the user.\n```\n\n```mdma\ntype: callout\nid: account-verified\nvariant: success\ncontent: \"Your account has been successfully verified. You now have full access to all features.\"\n```",
+          "tokenUsage": {
+            "total": 1532,
+            "prompt": 1450,
+            "completion": 82,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 953,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "957",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:22 GMT",
+                "modal-function-call-id": "fc-01KWC8HFY1P03EB2EA7W2WNGV1",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Account-verified callout (DSL)",
+          "vars": {
+            "request": "callout#account-verified(variant=success)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/callout-variant.mjs",
+              "config": {
+                "variant": "success"
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 12,
+        "vars": {
+          "request": "callout#account-verified(variant=success)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "957",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:22 GMT",
+              "modal-function-call-id": "fc-01KWC8HFY1P03EB2EA7W2WNGV1",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: chart",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "chart"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Pie chart variant found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/pie-chart.mjs"
+              }
+            }
+          ]
+        },
+        "id": "3f040def-5226-4fdd-b7e8-9e5c8120a485",
+        "latencyMs": 1265,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"chart#browser-share(variant=pie)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Creating a pie chart for browser market share with realistic dummy data.\n```\n\n```mdma\ntype: chart\nid: browser-share\nvariant: pie\nlabel: \"Browser Market Share\"\ndata: |\n  Browser, Share\n  Chrome, 65\n  Safari, 18\n  Firefox, 4\n  Edge, 4\n  Opera, 2\n  Other, 7\n```",
+          "tokenUsage": {
+            "total": 1565,
+            "prompt": 1449,
+            "completion": 116,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1265,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "992",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:23 GMT",
+                "modal-function-call-id": "fc-01KWC8HGXF3CGYK45GVNGWAMAQ",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Browser-share pie chart (DSL)",
+          "vars": {
+            "request": "chart#browser-share(variant=pie)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "chart"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/pie-chart.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 13,
+        "vars": {
+          "request": "chart#browser-share(variant=pie)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "992",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:23 GMT",
+              "modal-function-call-id": "fc-01KWC8HGXF3CGYK45GVNGWAMAQ",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: button",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "button"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "contains",
+                "value": "variant: danger"
+              }
+            }
+          ]
+        },
+        "id": "6aab8160-5d69-4d80-8a9d-151ab9612d38",
+        "latencyMs": 595,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"button#delete-account-btn(text=\\\"Delete Account\\\", action=delete-account, variant=danger)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: button\nid: delete-account-btn\ntext: \"Delete Account\"\nonAction: delete-account\nvariant: danger\n```",
+          "tokenUsage": {
+            "total": 1498,
+            "prompt": 1462,
+            "completion": 36,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 595,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "750",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:24 GMT",
+                "modal-function-call-id": "fc-01KWC8HJ3YV7FCSPNMARG9D1E0",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Delete-account danger button (DSL)",
+          "vars": {
+            "request": "button#delete-account-btn(text=\"Delete Account\", action=delete-account, variant=danger)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "button"
+                ]
+              }
+            },
+            {
+              "type": "contains",
+              "value": "variant: danger"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 14,
+        "vars": {
+          "request": "button#delete-account-btn(text=\"Delete Account\", action=delete-account, variant=danger)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "750",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:24 GMT",
+              "modal-function-call-id": "fc-01KWC8HJ3YV7FCSPNMARG9D1E0",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form, callout",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 3 components (min: 2)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/component-count.mjs",
+                "config": {
+                  "min": 2
+                }
+              }
+            }
+          ]
+        },
+        "id": "4e289f7f-af15-431d-a8b5-c0e69013055e",
+        "latencyMs": 1630,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"form#ticket-form[subject*:t, description*:ta](action=ticket-submitted)\\ncallout#ticket-submitted(variant=success)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Support ticket creation: a form with a required subject and a required description; upon submission, it triggers ticket-submitted, followed by a success callout.\n```\n\n```mdma\ntype: form\nid: ticket-form\nfields:\n  - name: subject\n    type: text\n    label: \"Subject\"\n    required: true\n  - name: description\n    type: textarea\n    label: \"Description\"\n    required: true\nonSubmit: ticket-submitted\n```\n\n```mdma\ntype: callout\nid: ticket-submitted\nvariant: success\ncontent: \"Your support ticket has been submitted successfully. A representative will be with you shortly.\"\n```",
+          "tokenUsage": {
+            "total": 1643,
+            "prompt": 1471,
+            "completion": 172,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1630,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1315",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:25 GMT",
+                "modal-function-call-id": "fc-01KWC8HJRRSJ2MZR7ZK510T39G",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Ticket form + submitted callout (DSL)",
+          "vars": {
+            "request": "form#ticket-form[subject*:t, description*:ta](action=ticket-submitted)\ncallout#ticket-submitted(variant=success)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/component-count.mjs",
+              "config": {
+                "min": 2
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 15,
+        "vars": {
+          "request": "form#ticket-form[subject*:t, description*:ta](action=ticket-submitted)\ncallout#ticket-submitted(variant=success)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1315",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:25 GMT",
+              "modal-function-call-id": "fc-01KWC8HJRRSJ2MZR7ZK510T39G",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: table",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "table"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Table with sortable and filterable found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/table-features.mjs"
+              }
+            }
+          ]
+        },
+        "id": "510ef276-c9c6-4cc9-baaf-2f1d7d418d9d",
+        "latencyMs": 2466,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"table#employee-directory\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Employee Directory table: a searchable/sortable list displaying name, department, and status.\n```\n\n```mdma\ntype: table\nid: employee-directory\nsortable: true\nfilterable: true\ncolumns:\n  - key: name\n    header: \"Employee Name\"\n    sortable: true\n  - key: department\n    header: \"Department\"\n    sortable: true\n  - key: role\n    header: \"Role\"\n  - key: status\n    header: \"Status\"\ndata:\n  - { name: \"Alice Johnson\", department: \"Engineering\", role: \"Frontend Lead\", status: \"Active\" }\n  - { name: \"Bob Smith\", department: \"Sales\", role: \"Account Executive\", status: \"Active\" }\n  - { name: \"Charlie Davis\", department: \"HR\", role: \"Generalist\", status: \"On Leave\" }\n  - { name: \"Diana Prince\", department: \"Engineering\", role: \"DevOps Engineer\", status: \"Active\" }\n  - { name: \"Edward Norton\", department: \"Marketing\", role: \"SEO Specialist\", status: \"Inactive\" }\n```",
+          "tokenUsage": {
+            "total": 1723,
+            "prompt": 1444,
+            "completion": 279,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2466,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1652",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:26 GMT",
+                "modal-function-call-id": "fc-01KWC8HMC9R4F20RG0CFWH9ATF",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Employee directory table (DSL)",
+          "vars": {
+            "request": "table#employee-directory\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "table"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/table-features.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 16,
+        "vars": {
+          "request": "table#employee-directory\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1652",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:26 GMT",
+              "modal-function-call-id": "fc-01KWC8HMC9R4F20RG0CFWH9ATF",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 5 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 5
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 4 sensitive flags",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/pii-sensitive.mjs"
+              }
+            }
+          ]
+        },
+        "id": "9b86a2f9-2c05-428d-90e4-ebe95feca0f1",
+        "latencyMs": 2187,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"form#payment-form[cardholder-name*:t, card-number*^:t, expiry*^:t, cvv*^:t, billing-address*^:ta](action=payment-submitted)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Payment form: collects cardholder name, card number, expiry, CVV, and billing address; uses action payment-submitted. PII sensitivity applied to card details and address.\n```\n\n```mdma\ntype: form\nid: payment-form\nfields:\n  - name: cardholder-name\n    type: text\n    label: \"Cardholder Name\"\n    required: true\n  - name: card-number\n    type: text\n    label: \"Card Number\"\n    required: true\n    sensitive: true\n  - name: expiry\n    type: text\n    label: \"Expiry (MM/YY)\"\n    required: true\n    sensitive: true\n  - name: cvv\n    type: text\n    label: \"CVV\"\n    required: true\n    sensitive: true\n  - name: billing-address\n    type: textarea\n    label: \"Billing Address\"\n    required: true\n    sensitive: true\nonSubmit: payment-submitted\n```",
+          "tokenUsage": {
+            "total": 1727,
+            "prompt": 1486,
+            "completion": 241,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2187,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1496",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:29 GMT",
+                "modal-function-call-id": "fc-01KWC8HPSNJ9DAHCJRBMFYD3SE",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Payment form, financial PII (DSL)",
+          "vars": {
+            "request": "form#payment-form[cardholder-name*:t, card-number*^:t, expiry*^:t, cvv*^:t, billing-address*^:ta](action=payment-submitted)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 5
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/pii-sensitive.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 17,
+        "vars": {
+          "request": "form#payment-form[cardholder-name*:t, card-number*^:t, expiry*^:t, cvv*^:t, billing-address*^:ta](action=payment-submitted)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1496",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:29 GMT",
+              "modal-function-call-id": "fc-01KWC8HPSNJ9DAHCJRBMFYD3SE",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 4 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 4
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 3 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 3
+                }
+              }
+            }
+          ]
+        },
+        "id": "80cabe84-bd2e-4b2e-9723-e144863ab681",
+        "latencyMs": 1686,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"form#user-registration[username*:t, email*^:e, password*^:t, bio:ta](action=registration-submitted)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  User registration form: requires a username, a sensitive email, and a sensitive password; includes an optional bio textarea. Submits via registration-submitted.\n```\n\n```mdma\ntype: form\nid: user-registration\nfields:\n  - name: username\n    type: text\n    label: \"Username\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    required: true\n    sensitive: true\n  - name: password\n    type: text\n    label: \"Password\"\n    required: true\n    sensitive: true\n  - name: bio\n    type: textarea\n    label: \"Biography\"\nonSubmit: registration-submitted\n```",
+          "tokenUsage": {
+            "total": 1653,
+            "prompt": 1471,
+            "completion": 182,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1686,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1324",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:31 GMT",
+                "modal-function-call-id": "fc-01KWC8HRYXG1CKQWMK7K85BSD9",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "User registration form (DSL)",
+          "vars": {
+            "request": "form#user-registration[username*:t, email*^:e, password*^:t, bio:ta](action=registration-submitted)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 4
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 3
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 18,
+        "vars": {
+          "request": "form#user-registration[username*:t, email*^:e, password*^:t, bio:ta](action=registration-submitted)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1324",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:31 GMT",
+              "modal-function-call-id": "fc-01KWC8HRYXG1CKQWMK7K85BSD9",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: tasklist, button",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "tasklist",
+                    "button"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "fe6cf157-7351-4712-b56a-aabf0fa5dd70",
+        "latencyMs": 1711,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"tasklist#onboarding-checklist\\nbutton#get-started-btn(text=\\\"Get Started\\\", action=onboarding-done, variant=primary)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Designing an onboarding experience: a tasklist to track onboarding steps and a primary \"Get Started\" button to complete the onboarding process.\n```\n\n```mdma\ntype: tasklist\nid: onboarding-checklist\nitems:\n  - id: account-setup\n    text: \"Set up your profile\"\n  - id: identity-verification\n    text: \"Verify your identity\"\n  - id: bank-linkage\n    text: \"Link a bank account\"\n  - id: training-module\n    text: \"Complete initial training\"\n```\n\n```mdma\ntype: button\nid: get-started-btn\ntext: \"Get Started\"\nvariant: primary\nonAction: onboarding-done\n```",
+          "tokenUsage": {
+            "total": 1652,
+            "prompt": 1471,
+            "completion": 181,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1711,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1299",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:34 GMT",
+                "modal-function-call-id": "fc-01KWC8HTMSP8PMF1AZ2Z7ZQ5VT",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Onboarding tasklist + button (DSL)",
+          "vars": {
+            "request": "tasklist#onboarding-checklist\nbutton#get-started-btn(text=\"Get Started\", action=onboarding-done, variant=primary)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "tasklist",
+                  "button"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 19,
+        "vars": {
+          "request": "tasklist#onboarding-checklist\nbutton#get-started-btn(text=\"Get Started\", action=onboarding-done, variant=primary)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1299",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:34 GMT",
+              "modal-function-call-id": "fc-01KWC8HTMSP8PMF1AZ2Z7ZQ5VT",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form, callout",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "db57c1ea-b1f9-42d8-b43c-251544a14f1d",
+        "latencyMs": 1380,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"form#settings-form[notifications-enabled:c](action=notification-info)\\ncallout#notification-info(variant=info)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Create a settings form with a single checkbox for notifications. This checkbox triggers an action that is subsequently intercepted by an informational callout.\n```\n\n```mdma\ntype: form\nid: settings-form\nfields:\n  - name: notifications-enabled\n    type: checkbox\n    label: \"Enable Notifications\"\nonSubmit: notification-info\n```\n\n```mdma\ntype: callout\nid: notification-info\nvariant: info\ncontent: \"Notification settings have been updated. Your preferences are now being applied to your account immediately.\"\n```",
+          "tokenUsage": {
+            "total": 1612,
+            "prompt": 1469,
+            "completion": 143,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1380,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1247",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:35 GMT",
+                "modal-function-call-id": "fc-01KWC8HWBF0DZ07SJRR3NT2J2T",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Settings form + conditional callout (DSL)",
+          "vars": {
+            "request": "form#settings-form[notifications-enabled:c](action=notification-info)\ncallout#notification-info(variant=info)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 20,
+        "vars": {
+          "request": "form#settings-form[notifications-enabled:c](action=notification-info)\ncallout#notification-info(variant=info)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1247",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:35 GMT",
+              "modal-function-call-id": "fc-01KWC8HWBF0DZ07SJRR3NT2J2T",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "2 unique kebab-case IDs",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/unique-kebab-ids.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            }
+          ]
+        },
+        "id": "551b03c1-e810-4996-85d9-902cfd284c49",
+        "latencyMs": 1967,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"form#personal-info-form[full-name*:t, email*^:e, phone^:t, dob^:d, ssn*^:t](action=info-submitted)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Personal info form: captures name, sensitive email, sensitive phone, sensitive dob, and highly sensitive SSN; submits via info-submitted.\n```\n\n```mdma\ntype: form\nid: personal-info-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: \"Phone Number\"\n    sensitive: true\n  - name: dob\n    type: date\n    label: \"Date of Birth\"\n    sensitive: true\n  - name: ssn\n    type: text\n    label: \"Social Security Number\"\n    required: true\n    sensitive: true\nonSubmit: info-submitted\n```",
+          "tokenUsage": {
+            "total": 1701,
+            "prompt": 1482,
+            "completion": 219,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1967,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1409",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:37 GMT",
+                "modal-function-call-id": "fc-01KWC8HXR4F6QBQKB2TE1N5S82",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "HR personal info form (DSL)",
+          "vars": {
+            "request": "form#personal-info-form[full-name*:t, email*^:e, phone^:t, dob^:d, ssn*^:t](action=info-submitted)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/unique-kebab-ids.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 21,
+        "vars": {
+          "request": "form#personal-info-form[full-name*:t, email*^:e, phone^:t, dob^:d, ssn*^:t](action=info-submitted)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1409",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:37 GMT",
+              "modal-function-call-id": "fc-01KWC8HXR4F6QBQKB2TE1N5S82",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: approval-gate",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "approval-gate"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "fc861626-b26e-4c86-a1d7-3551664cf7aa",
+        "latencyMs": 848,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"approval-gate#budget-approval\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Budget approval gate: a high-level decision step to ensure proper spend authorization.\n```\n\n```mdma\ntype: approval-gate\nid: budget-approval\ntitle: \"Approve Budget Allocation\"\n```",
+          "tokenUsage": {
+            "total": 1517,
+            "prompt": 1446,
+            "completion": 71,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 848,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "902",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:39 GMT",
+                "modal-function-call-id": "fc-01KWC8HZQ6N4G993S0XMTZGX22",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Budget approval gate (DSL)",
+          "vars": {
+            "request": "approval-gate#budget-approval\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "approval-gate"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 22,
+        "vars": {
+          "request": "approval-gate#budget-approval\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "902",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:39 GMT",
+              "modal-function-call-id": "fc-01KWC8HZQ6N4G993S0XMTZGX22",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 1 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 7 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 7
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 4 sensitive flags",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/pii-sensitive.mjs"
+              }
+            }
+          ]
+        },
+        "id": "d0fde989-6407-43e9-8425-77fb698c7ad1",
+        "latencyMs": 2452,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"form#patient-intake[full-name*:t, dob*^:d, email^:e, phone^:t, medical-record*^:t, medications^:ta, allergies^:ta](action=patient-intake-submitted)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Patient intake form: captures PII (Name, DOB, Email, Phone, MRN) and clinical details (Medications, Allergies); submits via patient-intake-submitted.\n```\n\n```mdma\ntype: form\nid: patient-intake\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: dob\n    type: date\n    label: \"Date of Birth\"\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    sensitive: true\n  - name: phone\n    type: text\n    label: \"Phone Number\"\n    sensitive: true\n  - name: medical-record\n    type: text\n    label: \"Medical Record Number (MRN)\"\n    required: true\n    sensitive: true\n  - name: medications\n    type: textarea\n    label: \"Current Medications\"\n  - name: allergies\n    type: textarea\n    label: \"Known Allergies\"\nonSubmit: patient-intake-submitted\n```",
+          "tokenUsage": {
+            "total": 1769,
+            "prompt": 1495,
+            "completion": 274,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2452,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1595",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:40 GMT",
+                "modal-function-call-id": "fc-01KWC8J0KHB9XVA63X3XB10K99",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Patient intake form, medical PII (DSL)",
+          "vars": {
+            "request": "form#patient-intake[full-name*:t, dob*^:d, email^:e, phone^:t, medical-record*^:t, medications^:ta, allergies^:ta](action=patient-intake-submitted)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 7
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/pii-sensitive.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 23,
+        "vars": {
+          "request": "form#patient-intake[full-name*:t, dob*^:d, email^:e, phone^:t, medical-record*^:t, medications^:ta, allergies^:ta](action=patient-intake-submitted)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1595",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:40 GMT",
+              "modal-function-call-id": "fc-01KWC8J0KHB9XVA63X3XB10K99",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: chart, chart, table",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "chart",
+                    "table"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Bar chart variant found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/bar-chart.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Pie chart variant found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/pie-chart.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "contains",
+                "value": "type: table"
+              }
+            }
+          ]
+        },
+        "id": "31534608-3ba9-462e-8b14-2e3258ce8dec",
+        "latencyMs": 4171,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"chart#monthly-revenue(variant=bar)\\nchart#category-breakdown(variant=pie)\\ntable#region-summary\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  The user requested two charts (bar and pie) and a table. Per the \"AT MOST ONE interactive component\" rule, I will provide the non-interactive components in a single response as they are stateless/view-only. All three components are non-interactive (form, button, tasklist, approval-gate, and webhook are interactive).\n```\n\n```mdma\ntype: chart\nid: monthly-revenue\nvariant: bar\nlabel: \"Monthly Gross Revenue\"\ndata: |\n  Month,Revenue\n  Jan,45000\n  Feb,52000\n  Mar,48000\n  Apr,61000\n  May,55000\n  Jun,67000\nxAxis: Month\n```\n\n```mdma\ntype: chart\nid: category-breakdown\nvariant: pie\nlabel: \"Revenue by Product Category\"\ndata: |\n  Category,Value\n  Electronics,45000\n  Home & Garden,32000\n  Apparel,28000\n  Toys,15000\n```\n\n```mdma\ntype: table\nid: region-summary\nsortable: true\nfilterable: true\ncolumns:\n  - key: region\n    header: \"Region\"\n    sortable: true\n  - key: growth\n    header: \"YoY Growth\"\n    sortable: true\n  - key: revenue\n    header: \"Revenue ($)\"\n    sortable: true\n  - key: health\n    header: \"Market Health\"\ndata:\n  - { region: \"North America\", growth: \"+12%\", revenue: 1250000, health: \"Strong\" }\n  - { region: \"EMEA\", growth: \"-4%\", revenue: 980000, health: \"Stable\" }\n  - { region: \"APAC\", growth: \"+22%\", revenue: 1450000, health: \"High Growth\" }\n  - { region: \"LATAM\", growth: \"+2%\", revenue: 450000, health: \"Emerging\" }\n```",
+          "tokenUsage": {
+            "total": 1972,
+            "prompt": 1467,
+            "completion": 505,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 4171,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "2147",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:42 GMT",
+                "modal-function-call-id": "fc-01KWC8J2ZCFVQRZVRT26H6JJFY",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Sales dashboard — 2 charts + table (DSL)",
+          "vars": {
+            "request": "chart#monthly-revenue(variant=bar)\nchart#category-breakdown(variant=pie)\ntable#region-summary\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "chart",
+                  "table"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/bar-chart.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/pie-chart.mjs"
+            },
+            {
+              "type": "contains",
+              "value": "type: table"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 24,
+        "vars": {
+          "request": "chart#monthly-revenue(variant=bar)\nchart#category-breakdown(variant=pie)\ntable#region-summary\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "2147",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:42 GMT",
+              "modal-function-call-id": "fc-01KWC8J2ZCFVQRZVRT26H6JJFY",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 2 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 2
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Form contains a file field",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/file-field.mjs"
+              }
+            }
+          ]
+        },
+        "id": "ee1d9172-9e22-4af2-b868-31a60a3f3d18",
+        "latencyMs": 1221,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"form#resume-form[full-name*:t, resume*:f](action=resume-submitted)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Resume upload form: requires a full name and a resume file; submits via resume-submitted.\n```\n\n```mdma\ntype: form\nid: resume-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: resume\n    type: file\n    label: \"Resume\"\n    required: true\nonSubmit: resume-submitted\n```",
+          "tokenUsage": {
+            "total": 1583,
+            "prompt": 1461,
+            "completion": 122,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1221,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1052",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:46 GMT",
+                "modal-function-call-id": "fc-01KWC8J73M51PGW3K3GFPVENRW",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Resume form with file field (DSL)",
+          "vars": {
+            "request": "form#resume-form[full-name*:t, resume*:f](action=resume-submitted)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 2
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/file-field.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 25,
+        "vars": {
+          "request": "form#resume-form[full-name*:t, resume*:f](action=resume-submitted)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1052",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:46 GMT",
+              "modal-function-call-id": "fc-01KWC8J73M51PGW3K3GFPVENRW",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Form contains a file field; sensitive: true",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/file-field.mjs",
+                "config": {
+                  "sensitive": true
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            }
+          ]
+        },
+        "id": "4312d5cb-239d-4bc3-b4fe-7bf8a9e7a9bc",
+        "latencyMs": 1399,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"form#kyc-form[full-name*^:t, passport-scan*^:f](action=kyc-submitted)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  KYC form: requires a full name (sensitive PII) and a passport scan file upload; submits via kyc-submitted.\n```\n\n```mdma\ntype: form\nid: kyc-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n    sensitive: true\n  - name: passport-scan\n    type: file\n    label: \"Passport Scan\"\n    required: true\n    sensitive: true\nonSubmit: kyc-submitted\n```",
+          "tokenUsage": {
+            "total": 1614,
+            "prompt": 1469,
+            "completion": 145,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1399,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1119",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:47 GMT",
+                "modal-function-call-id": "fc-01KWC8J8A518JC25E679M4KWKH",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "KYC form, sensitive file (DSL)",
+          "vars": {
+            "request": "form#kyc-form[full-name*^:t, passport-scan*^:f](action=kyc-submitted)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/file-field.mjs",
+              "config": {
+                "sensitive": true
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 26,
+        "vars": {
+          "request": "form#kyc-form[full-name*^:t, passport-scan*^:f](action=kyc-submitted)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1119",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:47 GMT",
+              "modal-function-call-id": "fc-01KWC8J8A518JC25E679M4KWKH",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "contains",
+                "value": "id: devcon-2026-registration"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            }
+          ]
+        },
+        "id": "36c5c4c2-6d62-4534-b5c7-501aafb14a84",
+        "latencyMs": 1431,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\"},{\"role\":\"user\",\"content\":\"form#devcon-2026-registration[full-name*:t, email*^:e](action=devcon-registration-submitted)\\n\"}]",
+          "label": "own-model/prompt-author.mjs",
+          "config": {}
+        },
+        "promptId": "a215030c5bb1ab12c4729b3195affd78a5fff95e4c88201c6183e640304b1358",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Registration form for DevCon 2026: requires a full name and a sensitive email; submits via devcon-registration-submitted.\n```\n\n```mdma\ntype: form\nid: devcon-2026-registration\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    required: true\n    sensitive: true\nonSubmit: devcon-registration-submitted\n```",
+          "tokenUsage": {
+            "total": 1619,
+            "prompt": 1472,
+            "completion": 147,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1431,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1138",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:49 GMT",
+                "modal-function-call-id": "fc-01KWC8J9PKBFX1T1H2YD03NSS0",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Conference registration, preserve id (DSL)",
+          "vars": {
+            "request": "form#devcon-2026-registration[full-name*:t, email*^:e](action=devcon-registration-submitted)\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "contains",
+              "value": "id: devcon-2026-registration"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 27,
+        "vars": {
+          "request": "form#devcon-2026-registration[full-name*:t, email*^:e](action=devcon-registration-submitted)\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1138",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:49 GMT",
+              "modal-function-call-id": "fc-01KWC8J9PKBFX1T1H2YD03NSS0",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      }
+    ],
+    "stats": {
+      "successes": 28,
+      "failures": 0,
+      "errors": 0,
+      "tokenUsage": {
+        "prompt": 40987,
+        "completion": 4609,
+        "cached": 0,
+        "total": 45596,
+        "numRequests": 28,
+        "completionDetails": {
+          "reasoning": 0,
+          "acceptedPrediction": 0,
+          "rejectedPrediction": 0,
+          "cacheReadInputTokens": 0,
+          "cacheCreationInputTokens": 0
+        },
+        "assertions": {
+          "total": 0,
+          "prompt": 0,
+          "completion": 0,
+          "cached": 0,
+          "numRequests": 0,
+          "completionDetails": {
+            "reasoning": 0,
+            "acceptedPrediction": 0,
+            "rejectedPrediction": 0,
+            "cacheReadInputTokens": 0,
+            "cacheCreationInputTokens": 0
+          }
+        }
+      },
+      "durationMs": 48856,
+      "evaluationDurationMs": 48856
+    }
+  },
+  "config": {
+    "tags": {},
+    "description": "MDMA Author Eval (DSL) — own model",
+    "prompts": [
+      "file:///Users/marcinsadowski/GIT/mr-mdma/evals/own-model/prompt-author.mjs"
+    ],
+    "providers": [
+      {
+        "id": "openai:chat:mdma-26b",
+        "config": {
+          "apiBaseUrl": "https://REDACTED.modal.run/v1",
+          "apiKey": "[REDACTED]",
+          "temperature": 1,
+          "max_tokens": 2048,
+          "min_p": 0.02,
+          "repetition_penalty": 1.1,
+          "chat_template_kwargs": {
+            "enable_thinking": false
+          }
+        }
+      }
+    ],
+    "tests": [
+      {
+        "description": "Contact form (DSL)",
+        "vars": {
+          "request": "form#contact-form[full-name*:t, email*^:e, message*:ta](action=contact-submitted)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 3
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Employee PII form, sensitive fields (DSL)",
+        "vars": {
+          "request": "form#employee-pii-form[full-name*:t, email*^:e, phone^:t, ssn*^:t](action=employee-pii-submitted)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 4
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/pii-sensitive.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Maintenance callout (DSL)",
+        "vars": {
+          "request": "callout#maintenance-warning(variant=warning)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "callout"
+              ]
+            }
+          }
+        ]
+      },
+      {
+        "description": "Submit report button (DSL)",
+        "vars": {
+          "request": "button#submit-report-btn(text=\"Submit Report\", action=submit-report, variant=primary)\n"
+        },
+        "assert": [
+          {
+            "type": "not-contains",
+            "value": "```markdown"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "button"
+              ]
+            }
+          }
+        ]
+      },
+      {
+        "description": "Country selector form (DSL)",
+        "vars": {
+          "request": "form#country-selector-form[country*:s{us|uk|ca|au|de}](action=country-form-submitted)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Languages table (DSL)",
+        "vars": {
+          "request": "table#languages-table\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "table"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/table-features.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Quarterly sales bar chart (DSL)",
+        "vars": {
+          "request": "chart#quarterly-sales(variant=bar)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "chart"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/bar-chart.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Document approval gate (DSL)",
+        "vars": {
+          "request": "approval-gate#document-approval\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "approval-gate"
+              ]
+            }
+          }
+        ]
+      },
+      {
+        "description": "Incident triage severity form (DSL)",
+        "vars": {
+          "request": "form#severity-form[title*:t, reporter-email*^:e, severity*:s{critical|high|medium|low}](action=triage-submitted)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 3
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Settings form + saved callout (DSL)",
+        "vars": {
+          "request": "form#settings-form[display-name*:t, email*^:e](action=settings-saved)\ncallout#settings-saved(variant=success)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/unique-kebab-ids.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/component-count.mjs",
+            "config": {
+              "min": 2
+            }
+          }
+        ]
+      },
+      {
+        "description": "Password-expiry warning callout (DSL)",
+        "vars": {
+          "request": "callout#password-expiry(variant=warning)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/callout-variant.mjs",
+            "config": {
+              "variant": "warning"
+            }
+          }
+        ]
+      },
+      {
+        "description": "Payment-error callout (DSL)",
+        "vars": {
+          "request": "callout#payment-error(variant=error)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/callout-variant.mjs",
+            "config": {
+              "variant": "error"
+            }
+          }
+        ]
+      },
+      {
+        "description": "Account-verified callout (DSL)",
+        "vars": {
+          "request": "callout#account-verified(variant=success)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/callout-variant.mjs",
+            "config": {
+              "variant": "success"
+            }
+          }
+        ]
+      },
+      {
+        "description": "Browser-share pie chart (DSL)",
+        "vars": {
+          "request": "chart#browser-share(variant=pie)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "chart"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/pie-chart.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Delete-account danger button (DSL)",
+        "vars": {
+          "request": "button#delete-account-btn(text=\"Delete Account\", action=delete-account, variant=danger)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "button"
+              ]
+            }
+          },
+          {
+            "type": "contains",
+            "value": "variant: danger"
+          }
+        ]
+      },
+      {
+        "description": "Ticket form + submitted callout (DSL)",
+        "vars": {
+          "request": "form#ticket-form[subject*:t, description*:ta](action=ticket-submitted)\ncallout#ticket-submitted(variant=success)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/component-count.mjs",
+            "config": {
+              "min": 2
+            }
+          }
+        ]
+      },
+      {
+        "description": "Employee directory table (DSL)",
+        "vars": {
+          "request": "table#employee-directory\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "table"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/table-features.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Payment form, financial PII (DSL)",
+        "vars": {
+          "request": "form#payment-form[cardholder-name*:t, card-number*^:t, expiry*^:t, cvv*^:t, billing-address*^:ta](action=payment-submitted)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 5
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/pii-sensitive.mjs"
+          }
+        ]
+      },
+      {
+        "description": "User registration form (DSL)",
+        "vars": {
+          "request": "form#user-registration[username*:t, email*^:e, password*^:t, bio:ta](action=registration-submitted)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 4
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 3
+            }
+          }
+        ]
+      },
+      {
+        "description": "Onboarding tasklist + button (DSL)",
+        "vars": {
+          "request": "tasklist#onboarding-checklist\nbutton#get-started-btn(text=\"Get Started\", action=onboarding-done, variant=primary)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "tasklist",
+                "button"
+              ]
+            }
+          }
+        ]
+      },
+      {
+        "description": "Settings form + conditional callout (DSL)",
+        "vars": {
+          "request": "form#settings-form[notifications-enabled:c](action=notification-info)\ncallout#notification-info(variant=info)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          }
+        ]
+      },
+      {
+        "description": "HR personal info form (DSL)",
+        "vars": {
+          "request": "form#personal-info-form[full-name*:t, email*^:e, phone^:t, dob^:d, ssn*^:t](action=info-submitted)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/unique-kebab-ids.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Budget approval gate (DSL)",
+        "vars": {
+          "request": "approval-gate#budget-approval\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "approval-gate"
+              ]
+            }
+          }
+        ]
+      },
+      {
+        "description": "Patient intake form, medical PII (DSL)",
+        "vars": {
+          "request": "form#patient-intake[full-name*:t, dob*^:d, email^:e, phone^:t, medical-record*^:t, medications^:ta, allergies^:ta](action=patient-intake-submitted)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 7
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/pii-sensitive.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Sales dashboard — 2 charts + table (DSL)",
+        "vars": {
+          "request": "chart#monthly-revenue(variant=bar)\nchart#category-breakdown(variant=pie)\ntable#region-summary\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "chart",
+                "table"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/bar-chart.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/pie-chart.mjs"
+          },
+          {
+            "type": "contains",
+            "value": "type: table"
+          }
+        ]
+      },
+      {
+        "description": "Resume form with file field (DSL)",
+        "vars": {
+          "request": "form#resume-form[full-name*:t, resume*:f](action=resume-submitted)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 2
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/file-field.mjs"
+          }
+        ]
+      },
+      {
+        "description": "KYC form, sensitive file (DSL)",
+        "vars": {
+          "request": "form#kyc-form[full-name*^:t, passport-scan*^:f](action=kyc-submitted)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/file-field.mjs",
+            "config": {
+              "sensitive": true
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Conference registration, preserve id (DSL)",
+        "vars": {
+          "request": "form#devcon-2026-registration[full-name*:t, email*^:e](action=devcon-registration-submitted)\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "contains",
+            "value": "id: devcon-2026-registration"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          }
+        ]
+      }
+    ],
+    "scenarios": [],
+    "env": {},
+    "defaultTest": {
+      "assert": [
+        {
+          "type": "javascript",
+          "value": "file://assertions/validate-mdma.mjs",
+          "config": {
+            "exclude": [
+              "flow-ordering"
+            ]
+          }
+        }
+      ],
+      "vars": {},
+      "options": {},
+      "metadata": {}
+    },
+    "outputPath": [
+      "own-model/results-author.json"
+    ],
+    "extensions": [],
+    "metadata": {},
+    "evaluateOptions": {}
+  },
+  "shareableUrl": null,
+  "metadata": {
+    "promptfooVersion": "0.121.9",
+    "nodeVersion": "v22.22.0",
+    "platform": "darwin",
+    "arch": "arm64",
+    "exportedAt": "2026-06-30T12:37:51.375Z",
+    "evaluationCreatedAt": "2026-06-30T12:37:01.882Z"
+  }
+}
\ No newline at end of file
diff --git a/evals/own-model/results-custom.json b/evals/own-model/results-custom.json
new file mode 100644
index 0000000..875c8de
--- /dev/null
+++ b/evals/own-model/results-custom.json
@@ -0,0 +1,3109 @@
+{
+  "evalId": "eval-Ur5-2026-06-30T12:35:16",
+  "results": {
+    "version": 3,
+    "timestamp": "2026-06-30T12:35:16.976Z",
+    "prompts": [
+      {
+        "raw": "function ({ vars }) {\n  const system = vars.customPrompt\n    ? `${AUTHORING_SYSTEM_PROMPT}\\n\\n---\\n\\n${vars.customPrompt}`\n    : AUTHORING_SYSTEM_PROMPT;\n  return [\n    { role: 'system', content: `{% raw %}${system}{% endraw %}` },\n    { role: 'user', content: `{% raw %}${vars.request}{% endraw %}` },\n  ];\n}",
+        "label": "own-model/prompt-custom.mjs",
+        "config": {},
+        "id": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "provider": "openai:chat:mdma-26b",
+        "metrics": {
+          "score": 13,
+          "testPassCount": 13,
+          "testFailCount": 0,
+          "testErrorCount": 0,
+          "assertPassCount": 69,
+          "assertFailCount": 0,
+          "totalLatencyMs": 33107,
+          "tokenUsage": {
+            "prompt": 20012,
+            "completion": 3390,
+            "cached": 0,
+            "total": 23402,
+            "numRequests": 13,
+            "completionDetails": {
+              "reasoning": 0,
+              "acceptedPrediction": 0,
+              "rejectedPrediction": 0,
+              "cacheReadInputTokens": 0,
+              "cacheCreationInputTokens": 0
+            },
+            "assertions": {
+              "total": 0,
+              "prompt": 0,
+              "completion": 0,
+              "cached": 0,
+              "numRequests": 0,
+              "completionDetails": {
+                "reasoning": 0,
+                "acceptedPrediction": 0,
+                "rejectedPrediction": 0,
+                "cacheReadInputTokens": 0,
+                "cacheCreationInputTokens": 0
+              }
+            }
+          },
+          "namedScores": {},
+          "namedScoresCount": {},
+          "namedScoreWeights": {},
+          "cost": 0
+        }
+      }
+    ],
+    "results": [
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 5 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 5
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 4 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 2
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "contains",
+                "value": "type: textarea"
+              }
+            }
+          ]
+        },
+        "id": "2764a6d0-e7d8-4569-a3ab-fefd2e4b0d80",
+        "latencyMs": 3274,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a bug tracking assistant. When a user reports a bug, generate the\\nMDMA document for exactly this DSL intent and nothing else:\\n\\nform#bug-report[title*:t, severity:s{critical|high|medium|low}, steps*:ta, expected:ta, actual:ta](action=bug-submitted)\\n\"},{\"role\":\"user\",\"content\":\"I found a bug where the login page crashes after entering my password.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Bug Report form: collects a required title, a severity select (critical to low), and required textareas for steps, expected result, and actual result; submits via bug-submitted.\n```\n\n```mdma\ntype: form\nid: bug-report\nfields:\n  - name: title\n    type: text\n    label: \"Bug Title\"\n    required: true\n  - name: severity\n    type: select\n    label: \"Severity\"\n    options:\n      - { label: \"Critical\", value: \"critical\" }\n      - { label: \"High\", value: \"high\" }\n      - { label: \"Medium\", value: \"medium\" }\n      - { label: \"Low\", value: \"low\" }\n  - name: steps\n    type: textarea\n    label: \"Steps to Reproduce\"\n    required: true\n  - name: expected\n    type: textarea\n    label: \"Expected Behavior\"\n    required: true\n  - name: actual\n    type: textarea\n    label: \"Actual Behavior\"\n    required: true\nonSubmit: bug-submitted\n```",
+          "tokenUsage": {
+            "total": 1806,
+            "prompt": 1535,
+            "completion": 271,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 3274,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1602",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:16 GMT",
+                "modal-function-call-id": "fc-01KWC8DNB58Q32NDB3ZRH4AG7M",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Bug report form (DSL custom prompt)",
+          "vars": {
+            "customPrompt": "You are a bug tracking assistant. When a user reports a bug, generate the\nMDMA document for exactly this DSL intent and nothing else:\n\nform#bug-report[title*:t, severity:s{critical|high|medium|low}, steps*:ta, expected:ta, actual:ta](action=bug-submitted)\n",
+            "request": "I found a bug where the login page crashes after entering my password."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 5
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 2
+              }
+            },
+            {
+              "type": "contains",
+              "value": "type: textarea"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 0,
+        "vars": {
+          "customPrompt": "You are a bug tracking assistant. When a user reports a bug, generate the\nMDMA document for exactly this DSL intent and nothing else:\n\nform#bug-report[title*:t, severity:s{critical|high|medium|low}, steps*:ta, expected:ta, actual:ta](action=bug-submitted)\n",
+          "request": "I found a bug where the login page crashes after entering my password."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1602",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:16 GMT",
+              "modal-function-call-id": "fc-01KWC8DNB58Q32NDB3ZRH4AG7M",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 4 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 4
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 3 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 3
+                }
+              }
+            }
+          ]
+        },
+        "id": "f38bf4d4-7e93-42c8-8e4f-fb2ff3441836",
+        "latencyMs": 2254,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are an HR onboarding assistant. When a new hire needs to be set up,\\ngenerate the MDMA document for exactly this DSL intent and nothing else:\\n\\nform#new-hire-info[full-name*:t, email*^:e, department:s{engineering|design|marketing|sales|operations}, start-date*:d](action=onboarding-checklist)\\n\"},{\"role\":\"user\",\"content\":\"We have a new hire starting in the Design department next Monday.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Employee onboarding form with required name, sensitive email, department selection, and start date; submits via onboarding-checklist.\n```\n\n```mdma\ntype: form\nid: new-hire-info\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    required: true\n    sensitive: true\n  - name: department\n    type: select\n    label: \"Department\"\n    options:\n      - label: \"Engineering\"\n        value: \"engineering\"\n      - label: \"Design\"\n        value: \"design\"\n      - label: \"Marketing\"\n        value: \"marketing\"\n      - label: \"Sales\"\n        value: \"sales\"\n      - label: \"Operations\"\n        value: \"operations\"\n  - name: start-date\n    type: date\n    label: \"Start Date\"\n    required: true\nonSubmit: onboarding-checklist\n```",
+          "tokenUsage": {
+            "total": 1801,
+            "prompt": 1544,
+            "completion": 257,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2254,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1588",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:19 GMT",
+                "modal-function-call-id": "fc-01KWC8DRANQNXWPAB9RXQYQ3BH",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Onboarding form (DSL custom prompt)",
+          "vars": {
+            "customPrompt": "You are an HR onboarding assistant. When a new hire needs to be set up,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#new-hire-info[full-name*:t, email*^:e, department:s{engineering|design|marketing|sales|operations}, start-date*:d](action=onboarding-checklist)\n",
+            "request": "We have a new hire starting in the Design department next Monday."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 4
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 3
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 1,
+        "vars": {
+          "customPrompt": "You are an HR onboarding assistant. When a new hire needs to be set up,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#new-hire-info[full-name*:t, email*^:e, department:s{engineering|design|marketing|sales|operations}, start-date*:d](action=onboarding-checklist)\n",
+          "request": "We have a new hire starting in the Design department next Monday."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1588",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:19 GMT",
+              "modal-function-call-id": "fc-01KWC8DRANQNXWPAB9RXQYQ3BH",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 1 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form, chart",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "chart"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 4 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 4
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Pie chart variant found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/pie-chart.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            }
+          ]
+        },
+        "id": "da71cde5-9cef-45b8-974d-d15a0045f7b2",
+        "latencyMs": 3034,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a customer success assistant. When asked about feedback, generate\\nthe MDMA document for exactly this DSL intent and nothing else:\\n\\nform#feedback-form[customer-name:t, email^:e, rating:s{1|2|3|4|5}, feedback:ta](action=feedback-submitted)\\nchart#satisfaction-chart(variant=pie)\\n\"},{\"role\":\"user\",\"content\":\"I need to collect customer feedback for this quarter.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Customer feedback collection: a form for name, sensitive email, 1-5 rating, and textarea feedback; submits via feedback-submitted; alongside a pie chart for satisfaction trends.\n```\n\n```mdma\ntype: form\nid: feedback-form\nfields:\n  - name: customer-name\n    type: text\n    label: \"Customer Name\"\n  - name: email\n    type: email\n    label: \"Email Address\"\n    sensitive: true\n  - name: rating\n    type: select\n    label: \"Rating\"\n    options:\n      - { label: \"1\", value: \"1\" }\n      - { label: \"2\", value: \"2\" }\n      - { label: \"3\", value: \"3\" }\n      - { label: \"4\", value: \"4\" }\n      - { label: \"5\", value: \"5\" }\n  - name: feedback\n    type: textarea\n    label: \"Your Feedback\"\nonSubmit: feedback-submitted\n```\n\n```mdma\ntype: chart\nid: satisfaction-chart\nvariant: pie\nlabel: \"Customer Satisfaction Overview\"\ndata: |\n  Rating, Count\n  5, 45\n  4, 30\n  3, 15\n  2, 5\n  1, 5\n```",
+          "tokenUsage": {
+            "total": 1863,
+            "prompt": 1542,
+            "completion": 321,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 3034,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1662",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:22 GMT",
+                "modal-function-call-id": "fc-01KWC8DTJKAW5A1GRNFHAGBZV4",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Feedback form + pie chart (DSL custom prompt)",
+          "vars": {
+            "customPrompt": "You are a customer success assistant. When asked about feedback, generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\nform#feedback-form[customer-name:t, email^:e, rating:s{1|2|3|4|5}, feedback:ta](action=feedback-submitted)\nchart#satisfaction-chart(variant=pie)\n",
+            "request": "I need to collect customer feedback for this quarter."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "chart"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 4
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/pie-chart.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 2,
+        "vars": {
+          "customPrompt": "You are a customer success assistant. When asked about feedback, generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\nform#feedback-form[customer-name:t, email^:e, rating:s{1|2|3|4|5}, feedback:ta](action=feedback-submitted)\nchart#satisfaction-chart(variant=pie)\n",
+          "request": "I need to collect customer feedback for this quarter."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1662",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:22 GMT",
+              "modal-function-call-id": "fc-01KWC8DTJKAW5A1GRNFHAGBZV4",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 5 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 5
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            }
+          ]
+        },
+        "id": "6ff2fb4d-f317-4f04-b919-412ed2f0dc5a",
+        "latencyMs": 2597,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a finance assistant. When an expense needs logging, generate the\\nMDMA document for exactly this DSL intent and nothing else:\\n\\nform#expense-form[amount*:n, category:s{travel|equipment|software|meals|other}, date*:d, receipt-desc:ta, receipt-number:t](action=approve-expense)\\n\"},{\"role\":\"user\",\"content\":\"I need to expense a $250 flight for the NYC conference.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Expense logging form: includes required amount, category selection, required date, optional description, and optional receipt number; submits via approve-expense.\n```\n\n```mdma\ntype: form\nid: expense-form\nfields:\n  - name: amount\n    type: number\n    label: \"Amount\"\n    required: true\n  - name: category\n    type: select\n    label: \"Category\"\n    options:\n      - label: \"Travel\"\n        value: \"travel\"\n      - label: \"Equipment\"\n        value: \"equipment\"\n      - label: \"Software\"\n        value: \"software\"\n      - label: \"Meals\"\n        value: \"meals\"\n      - label: \"Other\"\n        value: \"other\"\n  - name: date\n    type: date\n    label: \"Date\"\n    required: true\n  - name: receipt-desc\n    type: textarea\n    label: \"Receipt Description\"\n  - name: receipt-number\n    type: text\n    label: \"Receipt Number\"\nonSubmit: approve-expense\n```",
+          "tokenUsage": {
+            "total": 1807,
+            "prompt": 1540,
+            "completion": 267,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2597,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1623",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:25 GMT",
+                "modal-function-call-id": "fc-01KWC8DXJ9BDMXJ6WEEW4Y3WDH",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Expense form (DSL custom prompt)",
+          "vars": {
+            "customPrompt": "You are a finance assistant. When an expense needs logging, generate the\nMDMA document for exactly this DSL intent and nothing else:\n\nform#expense-form[amount*:n, category:s{travel|equipment|software|meals|other}, date*:d, receipt-desc:ta, receipt-number:t](action=approve-expense)\n",
+            "request": "I need to expense a $250 flight for the NYC conference."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 5
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 3,
+        "vars": {
+          "customPrompt": "You are a finance assistant. When an expense needs logging, generate the\nMDMA document for exactly this DSL intent and nothing else:\n\nform#expense-form[amount*:n, category:s{travel|equipment|software|meals|other}, date*:d, receipt-desc:ta, receipt-number:t](action=approve-expense)\n",
+          "request": "I need to expense a $250 flight for the NYC conference."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1623",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:25 GMT",
+              "modal-function-call-id": "fc-01KWC8DXJ9BDMXJ6WEEW4Y3WDH",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 4 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 4
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 2 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 2
+                }
+              }
+            }
+          ]
+        },
+        "id": "2b6cea5b-84ce-48ab-95db-c80db6178bba",
+        "latencyMs": 3120,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are an IT helpdesk assistant. When a user reports an issue, generate\\nthe MDMA document for exactly this DSL intent and nothing else:\\n\\nform#it-ticket[requester-email*^:e, issue-type:s{hardware|software|network|access|other}, priority:s{low|medium|high|urgent}, description*:ta](action=submit-ticket)\\n\"},{\"role\":\"user\",\"content\":\"My monitor stopped working this morning and I can't do any visual design work.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Create an IT ticket form for hardware or software issues with fields for the requester's email (sensitive), issue type, priority, and a required description.\n```\n\n```mdma\ntype: form\nid: it-ticket\nfields:\n  - name: requester-email\n    type: email\n    label: \"Requester Email\"\n    required: true\n    sensitive: true\n  - name: issue-type\n    type: select\n    label: \"Issue Type\"\n    options:\n      - label: \"Hardware\"\n        value: \"hardware\"\n      - label: \"Software\"\n        value: \"software\"\n      - label: \"Network\"\n        value: \"network\"\n      - label: \"Access\"\n        value: \"access\"\n      - label: \"Other\"\n        value: \"other\"\n  - name: priority\n    type: select\n    label: \"Priority\"\n    options:\n      - label: \"Low\"\n        value: \"low\"\n      - label: \"Medium\"\n        value: \"medium\"\n      - label: \"High\"\n        value: \"high\"\n      - label: \"Urgent\"\n        value: \"urgent\"\n  - name: description\n    type: textarea\n    label: \"Issue Description\"\n    required: true\nonSubmit: submit-ticket\n```",
+          "tokenUsage": {
+            "total": 1876,
+            "prompt": 1552,
+            "completion": 324,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 3120,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1811",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:27 GMT",
+                "modal-function-call-id": "fc-01KWC8E05692C1HH3076WVAXRE",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "IT ticket form (DSL custom prompt)",
+          "vars": {
+            "customPrompt": "You are an IT helpdesk assistant. When a user reports an issue, generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\nform#it-ticket[requester-email*^:e, issue-type:s{hardware|software|network|access|other}, priority:s{low|medium|high|urgent}, description*:ta](action=submit-ticket)\n",
+            "request": "My monitor stopped working this morning and I can't do any visual design work."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 4
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 2
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 4,
+        "vars": {
+          "customPrompt": "You are an IT helpdesk assistant. When a user reports an issue, generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\nform#it-ticket[requester-email*^:e, issue-type:s{hardware|software|network|access|other}, priority:s{low|medium|high|urgent}, description*:ta](action=submit-ticket)\n",
+          "request": "My monitor stopped working this morning and I can't do any visual design work."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1811",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:27 GMT",
+              "modal-function-call-id": "fc-01KWC8E05692C1HH3076WVAXRE",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: table, chart",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "table",
+                    "chart"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Table with sortable and filterable found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/table-features.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Bar chart variant found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/bar-chart.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "not-contains",
+                "value": "type: form"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "not-contains",
+                "value": "type: button"
+              }
+            }
+          ]
+        },
+        "id": "65760cd7-ff64-44bf-8989-173cbcb5e05f",
+        "latencyMs": 3925,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a sales analytics assistant. When asked for a sales report,\\ngenerate the MDMA document for exactly this DSL intent and nothing else:\\n\\ntable#sales-table\\nchart#sales-chart(variant=bar)\\n\"},{\"role\":\"user\",\"content\":\"Show me the Q4 sales performance breakdown.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: table\nid: sales-table\nsortable: true\nfilterable: true\ncolumns:\n  - key: region\n    header: \"Region\"\n    sortable: true\n  - key: product-category\n    header: \"Product Category\"\n    sortable: true\n  - key: sales-total\n    header: \"Total Sales ($)\"\n    sortable: true\n  - key: growth\n    header: \"YoY Growth\"\ndata:\n  - { region: \"North America\", \"product-category\": \"Software\", \"sales-total\": 850000, growth: \"+12%\" }\n  - { region: \"North America\", \"product-category\": \"Hardware\", \"sales-total\": 420000, growth: \"-3%\" }\n  - { region: \"EMEA\", \"product-category\": \"Software\", \"sales-total\": 610000, growth: \"+18%\" }\n  - { region: \"EMEA\", \"product-category\": \"Hardware\", \"sales-total\": 290000, growth: \"+5%\" }\n  - { region: \"APAC\", \"product-category\": \"Software\", \"sales-total\": 540000, growth: \"+24%\" }\n  - { region: \"APAC\", \"product-category\": \"Hardware\", \"sales-total\": 310000, growth: \"+11%\" }\n```\n\n```mdma\ntype: chart\nid: sales-chart\nvariant: bar\nlabel: \"Quarterly Sales by Region\"\ndata: |\n  Region, Sales\n  North America, 1270000\n  EMEA, 900000\n  APAC, 850000\nxAxis: Region\n```",
+          "tokenUsage": {
+            "total": 1920,
+            "prompt": 1507,
+            "completion": 413,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 3925,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1830",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:30 GMT",
+                "modal-function-call-id": "fc-01KWC8E36WES5EK6GKEEYFAT16",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Sales table + bar chart (DSL custom prompt)",
+          "vars": {
+            "customPrompt": "You are a sales analytics assistant. When asked for a sales report,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\ntable#sales-table\nchart#sales-chart(variant=bar)\n",
+            "request": "Show me the Q4 sales performance breakdown."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "table",
+                  "chart"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/table-features.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/bar-chart.mjs"
+            },
+            {
+              "type": "not-contains",
+              "value": "type: form"
+            },
+            {
+              "type": "not-contains",
+              "value": "type: button"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 5,
+        "vars": {
+          "customPrompt": "You are a sales analytics assistant. When asked for a sales report,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\ntable#sales-table\nchart#sales-chart(variant=bar)\n",
+          "request": "Show me the Q4 sales performance breakdown."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1830",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:30 GMT",
+              "modal-function-call-id": "fc-01KWC8E36WES5EK6GKEEYFAT16",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 1 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 6 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 6
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 5 sensitive flags",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/pii-sensitive.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 4 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 4
+                }
+              }
+            }
+          ]
+        },
+        "id": "2891772b-cad0-41ee-ab88-89b601c94b41",
+        "latencyMs": 2552,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a medical intake assistant. When registering a patient, generate\\nthe MDMA document for exactly this DSL intent and nothing else:\\n\\nform#patient-intake[full-name*^:t, dob*^:d, email^:e, phone^:t, insurance-id*^:t, chief-complaint*:ta](action=patient-registered)\\n\"},{\"role\":\"user\",\"content\":\"New patient walk-in needs to be registered.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Medical intake form: captures required and sensitive PII (name, DOB, email, phone, insurance-id) and a required chief complaint; submits via patient-registered.\n```\n\n```mdma\ntype: form\nid: patient-intake\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n    sensitive: true\n  - name: dob\n    type: date\n    label: \"Date of Birth\"\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    sensitive: true\n  - name: phone\n    type: text\n    label: \"Phone Number\"\n    sensitive: true\n  - name: insurance-id\n    type: text\n    label: \"Insurance ID\"\n    required: true\n    sensitive: true\n  - name: chief-complaint\n    type: textarea\n    label: \"Chief Complaint\"\n    required: true\nonSubmit: patient-registered\n```",
+          "tokenUsage": {
+            "total": 1798,
+            "prompt": 1541,
+            "completion": 257,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2552,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1551",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:34 GMT",
+                "modal-function-call-id": "fc-01KWC8E72RDRE154424J9GHHR6",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Patient intake form, PII marking (DSL custom prompt)",
+          "vars": {
+            "customPrompt": "You are a medical intake assistant. When registering a patient, generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\nform#patient-intake[full-name*^:t, dob*^:d, email^:e, phone^:t, insurance-id*^:t, chief-complaint*:ta](action=patient-registered)\n",
+            "request": "New patient walk-in needs to be registered."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 6
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/pii-sensitive.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 4
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 6,
+        "vars": {
+          "customPrompt": "You are a medical intake assistant. When registering a patient, generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\nform#patient-intake[full-name*^:t, dob*^:d, email^:e, phone^:t, insurance-id*^:t, chief-complaint*:ta](action=patient-registered)\n",
+          "request": "New patient walk-in needs to be registered."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1551",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:34 GMT",
+              "modal-function-call-id": "fc-01KWC8E72RDRE154424J9GHHR6",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Callout with variant: warning found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/callout-variant.mjs",
+                "config": {
+                  "variant": "warning"
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "not-contains",
+                "value": "type: form"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "not-contains",
+                "value": "type: button"
+              }
+            }
+          ]
+        },
+        "id": "973292ae-dc30-4f87-a3fe-1d5f98a0f7ba",
+        "latencyMs": 1190,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a system status communicator. On a maintenance event, generate\\nthe MDMA document for exactly this DSL intent and nothing else (fill the\\ncallout content from the user's message):\\n\\ncallout#maintenance-notice(variant=warning)\\n\"},{\"role\":\"user\",\"content\":\"We need to take the payment system offline on Saturday March 8th from 2 AM to 6 AM EST for database migration.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Maintenance notice: A single warning callout describing the scheduled payment system downtime.\n```\n\n```mdma\ntype: callout\nid: maintenance-notice\nvariant: warning\ncontent: |\n  The payment system will be offline for scheduled maintenance on Saturday, March 8th, from 2:00 AM to 6:00 AM EST to facilitate a necessary database migration. We apologize for the inconvenience.\n```",
+          "tokenUsage": {
+            "total": 1646,
+            "prompt": 1531,
+            "completion": 115,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1190,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1098",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:37 GMT",
+                "modal-function-call-id": "fc-01KWC8E9M391E3TGM1AP3XT76R",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Maintenance callout (DSL custom prompt)",
+          "vars": {
+            "customPrompt": "You are a system status communicator. On a maintenance event, generate\nthe MDMA document for exactly this DSL intent and nothing else (fill the\ncallout content from the user's message):\n\ncallout#maintenance-notice(variant=warning)\n",
+            "request": "We need to take the payment system offline on Saturday March 8th from 2 AM to 6 AM EST for database migration."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/callout-variant.mjs",
+              "config": {
+                "variant": "warning"
+              }
+            },
+            {
+              "type": "not-contains",
+              "value": "type: form"
+            },
+            {
+              "type": "not-contains",
+              "value": "type: button"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 7,
+        "vars": {
+          "customPrompt": "You are a system status communicator. On a maintenance event, generate\nthe MDMA document for exactly this DSL intent and nothing else (fill the\ncallout content from the user's message):\n\ncallout#maintenance-notice(variant=warning)\n",
+          "request": "We need to take the payment system offline on Saturday March 8th from 2 AM to 6 AM EST for database migration."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1098",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:37 GMT",
+              "modal-function-call-id": "fc-01KWC8E9M391E3TGM1AP3XT76R",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 5 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 5
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 4 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 4
+                }
+              }
+            }
+          ]
+        },
+        "id": "fcff0533-b867-4c7b-a534-f65f32af74f5",
+        "latencyMs": 2641,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a legal operations assistant. When a contract needs review,\\ngenerate the MDMA document for exactly this DSL intent and nothing else:\\n\\nform#contract-summary[contract-title*:t, counterparty*:t, contract-value*:n, effective-date*:d, contract-type:s{nda|msa|sow|amendment|renewal}](action=review-checklist)\\n\"},{\"role\":\"user\",\"content\":\"We need to review the new SoW from Acme Corp worth $500k.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Contract review form: capturing title, counterparty, value, date, and type (NDA/MSA/SOW/Amendment/Renewal) to trigger the review-checklist action.\n```\n\n```mdma\ntype: form\nid: contract-summary\nfields:\n  - name: contract-title\n    type: text\n    label: \"Contract Title\"\n    required: true\n  - name: counterparty\n    type: text\n    label: \"Counterparty\"\n    required: true\n  - name: contract-value\n    type: number\n    label: \"Contract Value ($)\"\n    required: true\n  - name: effective-date\n    type: date\n    label: \"Effective Date\"\n    required: true\n  - name: contract-type\n    type: select\n    label: \"Contract Type\"\n    options:\n      - { label: \"NDA\", value: \"nda\" }\n      - { label: \"MSA\", value: \"msa\" }\n      - { label: \"SOW\", value: \"sow\" }\n      - { label: \"Amendment\", value: \"amendment\" }\n      - { label: \"Renewal\", value: \"renewal\" }\nonSubmit: review-checklist\n```",
+          "tokenUsage": {
+            "total": 1851,
+            "prompt": 1553,
+            "completion": 298,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2641,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1655",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:38 GMT",
+                "modal-function-call-id": "fc-01KWC8EAT499RE6CB2Q067Q6JT",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Contract summary form (DSL custom prompt)",
+          "vars": {
+            "customPrompt": "You are a legal operations assistant. When a contract needs review,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#contract-summary[contract-title*:t, counterparty*:t, contract-value*:n, effective-date*:d, contract-type:s{nda|msa|sow|amendment|renewal}](action=review-checklist)\n",
+            "request": "We need to review the new SoW from Acme Corp worth $500k."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 5
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 4
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 8,
+        "vars": {
+          "customPrompt": "You are a legal operations assistant. When a contract needs review,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#contract-summary[contract-title*:t, counterparty*:t, contract-value*:n, effective-date*:d, contract-type:s{nda|msa|sow|amendment|renewal}](action=review-checklist)\n",
+          "request": "We need to review the new SoW from Acme Corp worth $500k."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1655",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:38 GMT",
+              "modal-function-call-id": "fc-01KWC8EAT499RE6CB2Q067Q6JT",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 5 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 5
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Form contains a file field",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/file-field.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            }
+          ]
+        },
+        "id": "3094e7e9-a5d8-4f06-ae97-7d5aa8954cb5",
+        "latencyMs": 2456,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a recruiting assistant. When a candidate applies, generate the\\nMDMA document for exactly this DSL intent and nothing else:\\n\\nform#job-application[full-name*:t, email*^:e, resume*:f, portfolio:f, cover-letter:ta](action=application-submitted)\\n\"},{\"role\":\"user\",\"content\":\"I'd like to apply for the senior product designer role you posted.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Applying to a job role. The user requires a job application form with a required full name (PII), a required and sensitive email (PII), a required file upload for a resume, an optional file upload for a portfolio, and an optional textarea for a cover letter. This form will submit via the action 'application-submitted'.\n```\n\n```mdma\ntype: form\nid: job-application\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    required: true\n    sensitive: true\n  - name: resume\n    type: file\n    label: \"Resume\"\n    required: true\n  - name: portfolio\n    type: file\n    label: \"Portfolio Link/File\"\n  - name: cover-letter\n    type: textarea\n    label: \"Cover Letter\"\nonSubmit: application-submitted\n```",
+          "tokenUsage": {
+            "total": 1778,
+            "prompt": 1531,
+            "completion": 247,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2456,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1564",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:41 GMT",
+                "modal-function-call-id": "fc-01KWC8EDDM25G7KPG0WF064JCG",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Job application with file fields (DSL custom prompt)",
+          "vars": {
+            "customPrompt": "You are a recruiting assistant. When a candidate applies, generate the\nMDMA document for exactly this DSL intent and nothing else:\n\nform#job-application[full-name*:t, email*^:e, resume*:f, portfolio:f, cover-letter:ta](action=application-submitted)\n",
+            "request": "I'd like to apply for the senior product designer role you posted."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 5
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/file-field.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 9,
+        "vars": {
+          "customPrompt": "You are a recruiting assistant. When a candidate applies, generate the\nMDMA document for exactly this DSL intent and nothing else:\n\nform#job-application[full-name*:t, email*^:e, resume*:f, portfolio:f, cover-letter:ta](action=application-submitted)\n",
+          "request": "I'd like to apply for the senior product designer role you posted."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1564",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:41 GMT",
+              "modal-function-call-id": "fc-01KWC8EDDM25G7KPG0WF064JCG",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Exactly 4 form fields found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/exact-field-count.mjs",
+                "config": {
+                  "expected": 4
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Form contains a file field; sensitive: true",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/file-field.mjs",
+                "config": {
+                  "sensitive": true
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 4 sensitive flags",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/pii-sensitive.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 4 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 4
+                }
+              }
+            }
+          ]
+        },
+        "id": "767573b4-b8c9-43d2-b51f-72e03398c79d",
+        "latencyMs": 2139,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a KYC compliance assistant. When verifying a customer's identity,\\ngenerate the MDMA document for exactly this DSL intent and nothing else:\\n\\nform#kyc-identity-form[full-legal-name*^:t, date-of-birth*^:d, passport-scan*^:f, proof-of-address*^:f](action=kyc-identity-verified)\\n\"},{\"role\":\"user\",\"content\":\"I need to verify the identity of a new customer applying for an account.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  KYC identity form: collects sensitive legal name, DOB, and file uploads for passport and proof of address; submits via kyc-identity-verified.\n```\n\n```mdma\ntype: form\nid: kyc-identity-form\nfields:\n  - name: full-legal-name\n    type: text\n    label: \"Full Legal Name\"\n    required: true\n    sensitive: true\n  - name: date-of-birth\n    type: date\n    label: \"Date of Birth\"\n    required: true\n    sensitive: true\n  - name: passport-scan\n    type: file\n    label: \"Passport Scan\"\n    required: true\n    sensitive: true\n  - name: proof-of-address\n    type: file\n    label: \"Proof of Address\"\n    required: true\n    sensitive: true\nonSubmit: kyc-identity-verified\n```",
+          "tokenUsage": {
+            "total": 1777,
+            "prompt": 1553,
+            "completion": 224,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2139,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1413",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:43 GMT",
+                "modal-function-call-id": "fc-01KWC8EFVFZMKY8FDS06T54VHB",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "KYC identity form, all fields sensitive (DSL custom prompt)",
+          "vars": {
+            "customPrompt": "You are a KYC compliance assistant. When verifying a customer's identity,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#kyc-identity-form[full-legal-name*^:t, date-of-birth*^:d, passport-scan*^:f, proof-of-address*^:f](action=kyc-identity-verified)\n",
+            "request": "I need to verify the identity of a new customer applying for an account."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/exact-field-count.mjs",
+              "config": {
+                "expected": 4
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/file-field.mjs",
+              "config": {
+                "sensitive": true
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/pii-sensitive.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 4
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 10,
+        "vars": {
+          "customPrompt": "You are a KYC compliance assistant. When verifying a customer's identity,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#kyc-identity-form[full-legal-name*^:t, date-of-birth*^:d, passport-scan*^:f, proof-of-address*^:f](action=kyc-identity-verified)\n",
+          "request": "I need to verify the identity of a new customer applying for an account."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1413",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:43 GMT",
+              "modal-function-call-id": "fc-01KWC8EFVFZMKY8FDS06T54VHB",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: button",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "button"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "contains",
+                "value": "variant: danger"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "not-contains",
+                "value": "type: form"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "not-contains",
+                "value": "type: table"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "not-contains",
+                "value": "type: callout"
+              }
+            }
+          ]
+        },
+        "id": "ac81ec44-4f38-4bba-878f-b550189b9761",
+        "latencyMs": 1148,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are an account management assistant. When a user wants to delete their\\naccount, generate the MDMA document for exactly this DSL intent and nothing\\nelse:\\n\\nbutton#delete-account(text=\\\"Delete My Account\\\", action=delete-account-action, variant=danger)\\n\"},{\"role\":\"user\",\"content\":\"I want to close my account and delete all my data.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Create a button component for account deletion that triggers the 'delete-account-action' with a 'danger' variant.\n```\n\n```mdma\ntype: button\nid: delete-account\ntext: \"Delete My Account\"\nonAction: delete-account-action\nvariant: danger\n```",
+          "tokenUsage": {
+            "total": 1610,
+            "prompt": 1521,
+            "completion": 89,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1148,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "962",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:46 GMT",
+                "modal-function-call-id": "fc-01KWC8EHZKBGTQ8H1GVCKNJECJ",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Danger button (DSL custom prompt)",
+          "vars": {
+            "customPrompt": "You are an account management assistant. When a user wants to delete their\naccount, generate the MDMA document for exactly this DSL intent and nothing\nelse:\n\nbutton#delete-account(text=\"Delete My Account\", action=delete-account-action, variant=danger)\n",
+            "request": "I want to close my account and delete all my data."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "button"
+                ]
+              }
+            },
+            {
+              "type": "contains",
+              "value": "variant: danger"
+            },
+            {
+              "type": "not-contains",
+              "value": "type: form"
+            },
+            {
+              "type": "not-contains",
+              "value": "type: table"
+            },
+            {
+              "type": "not-contains",
+              "value": "type: callout"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 11,
+        "vars": {
+          "customPrompt": "You are an account management assistant. When a user wants to delete their\naccount, generate the MDMA document for exactly this DSL intent and nothing\nelse:\n\nbutton#delete-account(text=\"Delete My Account\", action=delete-account-action, variant=danger)\n",
+          "request": "I want to close my account and delete all my data."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "962",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:46 GMT",
+              "modal-function-call-id": "fc-01KWC8EHZKBGTQ8H1GVCKNJECJ",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "contains",
+                "value": "id: vendor-intake-q1-2026"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            }
+          ]
+        },
+        "id": "9fe2ab1e-9367-4479-aed0-ad93fdbb4eaa",
+        "latencyMs": 2777,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a vendor onboarding assistant. When onboarding a vendor, generate\\nthe MDMA document for exactly this DSL intent and nothing else (keep the id):\\n\\nform#vendor-intake-q1-2026[vendor-name*:t, vendor-contact-email*^:e, tax-identifier*^:t, service-category*:s{consulting|software|hardware|logistics|other}](action=vendor-intake-submitted)\\n\"},{\"role\":\"user\",\"content\":\"We need to onboard a new logistics vendor for Q1.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Onboarding form for logistics vendor: capturing name, email (PII), tax ID (PII), and service category (logistics selected by default); submits via vendor-intake-submitted.\n```\n\n```mdma\ntype: form\nid: vendor-intake-q1-2026\nfields:\n  - name: vendor-name\n    type: text\n    label: \"Vendor Name\"\n    required: true\n  - name: vendor-contact-email\n    type: email\n    label: \"Vendor Contact Email\"\n    required: true\n    sensitive: true\n  - name: tax-identifier\n    type: text\n    label: \"Tax Identification Number\"\n    required: true\n    sensitive: true\n  - name: service-category\n    type: select\n    label: \"Service Category\"\n    required: true\n    options:\n      - label: \"Consulting\"\n        value: \"consulting\"\n      - label: \"Software\"\n        value: \"software\"\n      - label: \"Hardware\"\n        value: \"hardware\"\n      - label: \"Logistics\"\n        value: \"logistics\"\n      - label: \"Other\"\n        value: \"other\"\nonSubmit: vendor-intake-submitted\n```",
+          "tokenUsage": {
+            "total": 1869,
+            "prompt": 1562,
+            "completion": 307,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2777,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1733",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:48 GMT",
+                "modal-function-call-id": "fc-01KWC8EK43WA7C9K8QS8J4792P",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Specific component id preserved (DSL custom prompt)",
+          "vars": {
+            "customPrompt": "You are a vendor onboarding assistant. When onboarding a vendor, generate\nthe MDMA document for exactly this DSL intent and nothing else (keep the id):\n\nform#vendor-intake-q1-2026[vendor-name*:t, vendor-contact-email*^:e, tax-identifier*^:t, service-category*:s{consulting|software|hardware|logistics|other}](action=vendor-intake-submitted)\n",
+            "request": "We need to onboard a new logistics vendor for Q1."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "contains",
+              "value": "id: vendor-intake-q1-2026"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 12,
+        "vars": {
+          "customPrompt": "You are a vendor onboarding assistant. When onboarding a vendor, generate\nthe MDMA document for exactly this DSL intent and nothing else (keep the id):\n\nform#vendor-intake-q1-2026[vendor-name*:t, vendor-contact-email*^:e, tax-identifier*^:t, service-category*:s{consulting|software|hardware|logistics|other}](action=vendor-intake-submitted)\n",
+          "request": "We need to onboard a new logistics vendor for Q1."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1733",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:48 GMT",
+              "modal-function-call-id": "fc-01KWC8EK43WA7C9K8QS8J4792P",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      }
+    ],
+    "stats": {
+      "successes": 13,
+      "failures": 0,
+      "errors": 0,
+      "tokenUsage": {
+        "prompt": 20012,
+        "completion": 3390,
+        "cached": 0,
+        "total": 23402,
+        "numRequests": 13,
+        "completionDetails": {
+          "reasoning": 0,
+          "acceptedPrediction": 0,
+          "rejectedPrediction": 0,
+          "cacheReadInputTokens": 0,
+          "cacheCreationInputTokens": 0
+        },
+        "assertions": {
+          "total": 0,
+          "prompt": 0,
+          "completion": 0,
+          "cached": 0,
+          "numRequests": 0,
+          "completionDetails": {
+            "reasoning": 0,
+            "acceptedPrediction": 0,
+            "rejectedPrediction": 0,
+            "cacheReadInputTokens": 0,
+            "cacheCreationInputTokens": 0
+          }
+        }
+      },
+      "durationMs": 33685,
+      "evaluationDurationMs": 33685
+    }
+  },
+  "config": {
+    "tags": {},
+    "description": "MDMA Author + Custom System Prompt Eval — own model",
+    "prompts": [
+      "file:///Users/marcinsadowski/GIT/mr-mdma/evals/own-model/prompt-custom.mjs"
+    ],
+    "providers": [
+      {
+        "id": "openai:chat:mdma-26b",
+        "config": {
+          "apiBaseUrl": "https://REDACTED.modal.run/v1",
+          "apiKey": "[REDACTED]",
+          "temperature": 1,
+          "max_tokens": 4096,
+          "min_p": 0.02,
+          "repetition_penalty": 1.1,
+          "chat_template_kwargs": {
+            "enable_thinking": false
+          }
+        }
+      }
+    ],
+    "tests": [
+      {
+        "description": "Bug report form (DSL custom prompt)",
+        "vars": {
+          "customPrompt": "You are a bug tracking assistant. When a user reports a bug, generate the\nMDMA document for exactly this DSL intent and nothing else:\n\nform#bug-report[title*:t, severity:s{critical|high|medium|low}, steps*:ta, expected:ta, actual:ta](action=bug-submitted)\n",
+          "request": "I found a bug where the login page crashes after entering my password."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 5
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 2
+            }
+          },
+          {
+            "type": "contains",
+            "value": "type: textarea"
+          }
+        ]
+      },
+      {
+        "description": "Onboarding form (DSL custom prompt)",
+        "vars": {
+          "customPrompt": "You are an HR onboarding assistant. When a new hire needs to be set up,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#new-hire-info[full-name*:t, email*^:e, department:s{engineering|design|marketing|sales|operations}, start-date*:d](action=onboarding-checklist)\n",
+          "request": "We have a new hire starting in the Design department next Monday."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 4
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 3
+            }
+          }
+        ]
+      },
+      {
+        "description": "Feedback form + pie chart (DSL custom prompt)",
+        "vars": {
+          "customPrompt": "You are a customer success assistant. When asked about feedback, generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\nform#feedback-form[customer-name:t, email^:e, rating:s{1|2|3|4|5}, feedback:ta](action=feedback-submitted)\nchart#satisfaction-chart(variant=pie)\n",
+          "request": "I need to collect customer feedback for this quarter."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "chart"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 4
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/pie-chart.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Expense form (DSL custom prompt)",
+        "vars": {
+          "customPrompt": "You are a finance assistant. When an expense needs logging, generate the\nMDMA document for exactly this DSL intent and nothing else:\n\nform#expense-form[amount*:n, category:s{travel|equipment|software|meals|other}, date*:d, receipt-desc:ta, receipt-number:t](action=approve-expense)\n",
+          "request": "I need to expense a $250 flight for the NYC conference."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 5
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          }
+        ]
+      },
+      {
+        "description": "IT ticket form (DSL custom prompt)",
+        "vars": {
+          "customPrompt": "You are an IT helpdesk assistant. When a user reports an issue, generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\nform#it-ticket[requester-email*^:e, issue-type:s{hardware|software|network|access|other}, priority:s{low|medium|high|urgent}, description*:ta](action=submit-ticket)\n",
+          "request": "My monitor stopped working this morning and I can't do any visual design work."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 4
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 2
+            }
+          }
+        ]
+      },
+      {
+        "description": "Sales table + bar chart (DSL custom prompt)",
+        "vars": {
+          "customPrompt": "You are a sales analytics assistant. When asked for a sales report,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\ntable#sales-table\nchart#sales-chart(variant=bar)\n",
+          "request": "Show me the Q4 sales performance breakdown."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "table",
+                "chart"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/table-features.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/bar-chart.mjs"
+          },
+          {
+            "type": "not-contains",
+            "value": "type: form"
+          },
+          {
+            "type": "not-contains",
+            "value": "type: button"
+          }
+        ]
+      },
+      {
+        "description": "Patient intake form, PII marking (DSL custom prompt)",
+        "vars": {
+          "customPrompt": "You are a medical intake assistant. When registering a patient, generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\nform#patient-intake[full-name*^:t, dob*^:d, email^:e, phone^:t, insurance-id*^:t, chief-complaint*:ta](action=patient-registered)\n",
+          "request": "New patient walk-in needs to be registered."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 6
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/pii-sensitive.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 4
+            }
+          }
+        ]
+      },
+      {
+        "description": "Maintenance callout (DSL custom prompt)",
+        "vars": {
+          "customPrompt": "You are a system status communicator. On a maintenance event, generate\nthe MDMA document for exactly this DSL intent and nothing else (fill the\ncallout content from the user's message):\n\ncallout#maintenance-notice(variant=warning)\n",
+          "request": "We need to take the payment system offline on Saturday March 8th from 2 AM to 6 AM EST for database migration."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/callout-variant.mjs",
+            "config": {
+              "variant": "warning"
+            }
+          },
+          {
+            "type": "not-contains",
+            "value": "type: form"
+          },
+          {
+            "type": "not-contains",
+            "value": "type: button"
+          }
+        ]
+      },
+      {
+        "description": "Contract summary form (DSL custom prompt)",
+        "vars": {
+          "customPrompt": "You are a legal operations assistant. When a contract needs review,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#contract-summary[contract-title*:t, counterparty*:t, contract-value*:n, effective-date*:d, contract-type:s{nda|msa|sow|amendment|renewal}](action=review-checklist)\n",
+          "request": "We need to review the new SoW from Acme Corp worth $500k."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 5
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 4
+            }
+          }
+        ]
+      },
+      {
+        "description": "Job application with file fields (DSL custom prompt)",
+        "vars": {
+          "customPrompt": "You are a recruiting assistant. When a candidate applies, generate the\nMDMA document for exactly this DSL intent and nothing else:\n\nform#job-application[full-name*:t, email*^:e, resume*:f, portfolio:f, cover-letter:ta](action=application-submitted)\n",
+          "request": "I'd like to apply for the senior product designer role you posted."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 5
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/file-field.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          }
+        ]
+      },
+      {
+        "description": "KYC identity form, all fields sensitive (DSL custom prompt)",
+        "vars": {
+          "customPrompt": "You are a KYC compliance assistant. When verifying a customer's identity,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#kyc-identity-form[full-legal-name*^:t, date-of-birth*^:d, passport-scan*^:f, proof-of-address*^:f](action=kyc-identity-verified)\n",
+          "request": "I need to verify the identity of a new customer applying for an account."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/exact-field-count.mjs",
+            "config": {
+              "expected": 4
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/file-field.mjs",
+            "config": {
+              "sensitive": true
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/pii-sensitive.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 4
+            }
+          }
+        ]
+      },
+      {
+        "description": "Danger button (DSL custom prompt)",
+        "vars": {
+          "customPrompt": "You are an account management assistant. When a user wants to delete their\naccount, generate the MDMA document for exactly this DSL intent and nothing\nelse:\n\nbutton#delete-account(text=\"Delete My Account\", action=delete-account-action, variant=danger)\n",
+          "request": "I want to close my account and delete all my data."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "button"
+              ]
+            }
+          },
+          {
+            "type": "contains",
+            "value": "variant: danger"
+          },
+          {
+            "type": "not-contains",
+            "value": "type: form"
+          },
+          {
+            "type": "not-contains",
+            "value": "type: table"
+          },
+          {
+            "type": "not-contains",
+            "value": "type: callout"
+          }
+        ]
+      },
+      {
+        "description": "Specific component id preserved (DSL custom prompt)",
+        "vars": {
+          "customPrompt": "You are a vendor onboarding assistant. When onboarding a vendor, generate\nthe MDMA document for exactly this DSL intent and nothing else (keep the id):\n\nform#vendor-intake-q1-2026[vendor-name*:t, vendor-contact-email*^:e, tax-identifier*^:t, service-category*:s{consulting|software|hardware|logistics|other}](action=vendor-intake-submitted)\n",
+          "request": "We need to onboard a new logistics vendor for Q1."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "contains",
+            "value": "id: vendor-intake-q1-2026"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          }
+        ]
+      }
+    ],
+    "scenarios": [],
+    "env": {},
+    "defaultTest": {
+      "assert": [
+        {
+          "type": "javascript",
+          "value": "file://assertions/validate-mdma.mjs",
+          "config": {
+            "exclude": [
+              "flow-ordering"
+            ]
+          }
+        }
+      ],
+      "vars": {},
+      "options": {},
+      "metadata": {}
+    },
+    "outputPath": [
+      "own-model/results-custom.json"
+    ],
+    "extensions": [],
+    "metadata": {},
+    "evaluateOptions": {}
+  },
+  "shareableUrl": null,
+  "metadata": {
+    "promptfooVersion": "0.121.9",
+    "nodeVersion": "v22.22.0",
+    "platform": "darwin",
+    "arch": "arm64",
+    "exportedAt": "2026-06-30T12:35:51.175Z",
+    "evaluationCreatedAt": "2026-06-30T12:35:16.976Z"
+  }
+}
\ No newline at end of file
diff --git a/evals/own-model/results-fixer.json b/evals/own-model/results-fixer.json
new file mode 100644
index 0000000..e77e73f
--- /dev/null
+++ b/evals/own-model/results-fixer.json
@@ -0,0 +1,2861 @@
+{
+  "evalId": "eval-rB8-2026-06-30T12:37:54",
+  "results": {
+    "version": 3,
+    "timestamp": "2026-06-30T12:37:54.142Z",
+    "prompts": [
+      {
+        "raw": "function ({ vars }) {\n  const variantKey = vars.variantKey ?? 'single-block';\n  const exclude = ['thinking-block'];\n  if (variantKey !== 'flow') exclude.push('flow-ordering');\n\n  const result = validate(vars.brokenDocument, { exclude });\n  const allIssues = result.issues.filter((i) => i.severity === 'error' || i.severity === 'warning');\n\n  const fixerPrompt = buildFixerPrompt(variantKey);\n  const systemPrompt = `${buildSystemPrompt()}\\n\\n---\\n\\n${fixerPrompt}`;\n  const userMessage = buildFixerMessage(vars.brokenDocument, allIssues, {\n    conversationHistory: vars.conversationHistory ?? undefined,\n    promptContext: vars.promptContext ?? undefined,\n  });\n\n  return [\n    { role: 'system', content: `{% raw %}${systemPrompt}{% endraw %}` },\n    { role: 'user', content: `{% raw %}${userMessage}{% endraw %}` },\n  ];\n}",
+        "label": "own-model/prompt-fixer.mjs",
+        "config": {},
+        "id": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "provider": "openai:chat:mdma-26b",
+        "metrics": {
+          "score": 15,
+          "testPassCount": 15,
+          "testFailCount": 0,
+          "testErrorCount": 0,
+          "assertPassCount": 53,
+          "assertFailCount": 0,
+          "totalLatencyMs": 18223,
+          "tokenUsage": {
+            "prompt": 106601,
+            "completion": 1384,
+            "cached": 0,
+            "total": 107985,
+            "numRequests": 15,
+            "completionDetails": {
+              "reasoning": 0,
+              "acceptedPrediction": 0,
+              "rejectedPrediction": 0,
+              "cacheReadInputTokens": 0,
+              "cacheCreationInputTokens": 0
+            },
+            "assertions": {
+              "total": 0,
+              "prompt": 0,
+              "completion": 0,
+              "cached": 0,
+              "numRequests": 0,
+              "completionDetails": {
+                "reasoning": 0,
+                "acceptedPrediction": 0,
+                "rejectedPrediction": 0,
+                "cacheReadInputTokens": 0,
+                "cacheCreationInputTokens": 0
+              }
+            }
+          },
+          "namedScores": {},
+          "namedScoresCount": {},
+          "namedScoreWeights": {},
+          "cost": 0
+        }
+      }
+    ],
+    "results": [
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 0 info, 1 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 1 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Component \"action-btn\" matches expected block",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-contains-component.mjs",
+                "config": {
+                  "expected": "type: button\nid: action-btn\nvariant: primary\n",
+                  "hasFields": [
+                    "text"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "939aad76-1002-4c67-aef4-2c30dc5c7821",
+        "latencyMs": 1608,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 1 issue(s) that need to be fixed:\\n\\n1. [error] schema-conformance #action-btn → text: text: Required\\n\\n---\\n\\n```mdma\\ntype: button\\nid: action-btn\\nvariant: primary\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: button\nid: action-btn\ntext: Continue\nvariant: primary\n```",
+          "tokenUsage": {
+            "total": 7027,
+            "prompt": 7003,
+            "completion": 24,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1608,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "707",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:54 GMT",
+                "modal-function-call-id": "fc-01KWC8JEXQVT8HRSAEVV4F67JF",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes button missing required text field",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: button\nid: action-btn\nvariant: primary\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-contains-component.mjs",
+              "config": {
+                "expected": "type: button\nid: action-btn\nvariant: primary\n",
+                "hasFields": [
+                  "text"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 0,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: button\nid: action-btn\nvariant: primary\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "707",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:54 GMT",
+              "modal-function-call-id": "fc-01KWC8JEXQVT8HRSAEVV4F67JF",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 0 info, 1 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 1 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Component \"status-notice\" matches expected block",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-contains-component.mjs",
+                "config": {
+                  "expected": "type: callout\nid: status-notice\nvariant: info\ntitle: System Status\n",
+                  "hasFields": [
+                    "content"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "143df824-547c-43a7-9c3b-b7d18b5ad17b",
+        "latencyMs": 669,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 1 issue(s) that need to be fixed:\\n\\n1. [error] schema-conformance #status-notice → content: content: Required\\n\\n---\\n\\n```mdma\\ntype: callout\\nid: status-notice\\nvariant: info\\ntitle: System Status\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: callout\nid: status-notice\nvariant: info\ntitle: System Status\ncontent: The system is currently operational and performing within normal parameters.\n```",
+          "tokenUsage": {
+            "total": 7049,
+            "prompt": 7009,
+            "completion": 40,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 669,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "801",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:55 GMT",
+                "modal-function-call-id": "fc-01KWC8JG3R3YJYTF75KFTZAKCB",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes callout missing required content field",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: callout\nid: status-notice\nvariant: info\ntitle: System Status\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-contains-component.mjs",
+              "config": {
+                "expected": "type: callout\nid: status-notice\nvariant: info\ntitle: System Status\n",
+                "hasFields": [
+                  "content"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 1,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: callout\nid: status-notice\nvariant: info\ntitle: System Status\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "801",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:55 GMT",
+              "modal-function-call-id": "fc-01KWC8JG3R3YJYTF75KFTZAKCB",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 0 info, 2 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 2 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Component \"contact-form\" matches expected block",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-contains-component.mjs",
+                "config": {
+                  "expected": "type: form\nid: contact-form\nfields:\n  - name: contact-type\n    type: select\n    label: Contact Type\n    required: true\n  - name: message\n    type: textarea\n    label: Message\n"
+                }
+              }
+            }
+          ]
+        },
+        "id": "1eec0e79-c194-4acf-b800-52494ba11825",
+        "latencyMs": 1634,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 4 issue(s) that need to be fixed:\\n\\n1. [error] schema-conformance #contact-form → onSubmit: onSubmit: Required\\n2. [warning] sensitive-flags #contact-form → fields[0]: Form field \\\"contact-type\\\" appears to contain PII but is missing sensitive: true\\n3. [warning] select-options #contact-form → fields[0]: Select field \\\"contact-type\\\" is missing options\\n4. [error] form-submit-action #contact-form → onSubmit: Form must have an onSubmit action\\n\\n---\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: contact-type\\n    type: select\\n    label: Contact Type\\n    required: true\\n  - name: message\\n    type: textarea\\n    label: Message\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: contact-form\nfields:\n  - name: contact-type\n    type: select\n    label: Contact Type\n    required: true\n    sensitive: true\n    options:\n      - label: General Inquiry\n        value: inquiry\n      - label: Support Request\n        value: support\n      - label: Sales Question\n        value: sales\n  - name: message\n    type: textarea\n    label: Message\nonSubmit: form-submitted-callout\n```\n\n```mdma\ntype: callout\nid: form-submitted-callout\nvariant: success\ncontent: Thank you for reaching out. Your message has been sent successfully.\n```",
+          "tokenUsage": {
+            "total": 7283,
+            "prompt": 7130,
+            "completion": 153,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1634,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1219",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:56 GMT",
+                "modal-function-call-id": "fc-01KWC8JGSDNY7CAEYVEWZ1KNVZ",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes select field missing required options array",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: form\nid: contact-form\nfields:\n  - name: contact-type\n    type: select\n    label: Contact Type\n    required: true\n  - name: message\n    type: textarea\n    label: Message\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-contains-component.mjs",
+              "config": {
+                "expected": "type: form\nid: contact-form\nfields:\n  - name: contact-type\n    type: select\n    label: Contact Type\n    required: true\n  - name: message\n    type: textarea\n    label: Message\n"
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 2,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: form\nid: contact-form\nfields:\n  - name: contact-type\n    type: select\n    label: Contact Type\n    required: true\n  - name: message\n    type: textarea\n    label: Message\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1219",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:56 GMT",
+              "modal-function-call-id": "fc-01KWC8JGSDNY7CAEYVEWZ1KNVZ",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 0 info, 1 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 1 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "No placeholder content found in visible output",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/no-placeholder-content.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Component \"welcome-callout\" matches expected block",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-contains-component.mjs",
+                "config": {
+                  "expected": "type: callout\nid: welcome-callout\nvariant: info\n",
+                  "hasFields": [
+                    "title",
+                    "content"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "cdb22d6e-c6c3-4824-a165-0586d41b8326",
+        "latencyMs": 907,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 0 issue(s) that need to be fixed:\\n\\n\\n\\n---\\n\\n```mdma\\ntype: callout\\nid: welcome-callout\\nvariant: info\\ntitle: TODO\\ncontent: Lorem ipsum dolor sit amet\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: callout\nid: welcome-callout\nvariant: info\ntitle: Welcome to the Portal\ncontent: We are glad to have you here. Please review the upcoming steps to complete your profile setup.\n```",
+          "tokenUsage": {
+            "total": 7048,
+            "prompt": 6997,
+            "completion": 51,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 907,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "829",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:58 GMT",
+                "modal-function-call-id": "fc-01KWC8JJJV7HJ8MHRCY7F4X8K4",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes placeholder title and content in callout",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: callout\nid: welcome-callout\nvariant: info\ntitle: TODO\ncontent: Lorem ipsum dolor sit amet\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/no-placeholder-content.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-contains-component.mjs",
+              "config": {
+                "expected": "type: callout\nid: welcome-callout\nvariant: info\n",
+                "hasFields": [
+                  "title",
+                  "content"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 3,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: callout\nid: welcome-callout\nvariant: info\ntitle: TODO\ncontent: Lorem ipsum dolor sit amet\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "829",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:58 GMT",
+              "modal-function-call-id": "fc-01KWC8JJJV7HJ8MHRCY7F4X8K4",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 0 info, 2 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 2 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Component \"contact-details\" matches expected block",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-contains-component.mjs",
+                "config": {
+                  "expected": "type: form\nid: contact-details\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: Phone Number\n    sensitive: true\n"
+                }
+              }
+            }
+          ]
+        },
+        "id": "ac816bda-1f67-41dd-ad9a-2295a72daa63",
+        "latencyMs": 1563,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 4 issue(s) that need to be fixed:\\n\\n1. [error] schema-conformance #contact-details → onSubmit: onSubmit: Required\\n2. [warning] sensitive-flags #contact-details → fields[1]: Form field \\\"email\\\" appears to contain PII but is missing sensitive: true\\n3. [warning] sensitive-flags #contact-details → fields[2]: Form field \\\"phone\\\" appears to contain PII but is missing sensitive: true\\n4. [error] form-submit-action #contact-details → onSubmit: Form must have an onSubmit action\\n\\n---\\n\\n```mdma\\ntype: form\\nid: contact-details\\nfields:\\n  - name: full-name\\n    type: text\\n    label: Full Name\\n    required: true\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n  - name: phone\\n    type: text\\n    label: Phone Number\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: contact-details\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: Phone Number\n    sensitive: true\nonSubmit: contact-details-submitted\n```\n\n```mdma\ntype: callout\nid: contact-details-submitted\nvariant: success\ncontent: Thank you for your message. We have received your contact details and will get back to you shortly.\n```",
+          "tokenUsage": {
+            "total": 7303,
+            "prompt": 7157,
+            "completion": 146,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1563,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1172",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:37:59 GMT",
+                "modal-function-call-id": "fc-01KWC8JKAN403NAY89GEFF2H08",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes email and phone fields missing sensitive flag",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: form\nid: contact-details\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n  - name: phone\n    type: text\n    label: Phone Number\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-contains-component.mjs",
+              "config": {
+                "expected": "type: form\nid: contact-details\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: Phone Number\n    sensitive: true\n"
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 4,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: form\nid: contact-details\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n  - name: phone\n    type: text\n    label: Phone Number\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1172",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:37:59 GMT",
+              "modal-function-call-id": "fc-01KWC8JKAN403NAY89GEFF2H08",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 0 info, 1 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 1 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Component \"refresh-btn\" matches expected block",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-contains-component.mjs",
+                "config": {
+                  "expected": "type: button\nid: refresh-btn\nonAction: refresh-stats\n",
+                  "hasFields": [
+                    "text"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "c3b7eb3c-0c83-4e89-a9ef-0eefdc2258af",
+        "latencyMs": 577,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 1 issue(s) that need to be fixed:\\n\\n1. [error] schema-conformance #refresh-btn → text: text: Required\\n\\n---\\n\\n```mdma\\ntype: button\\nid: refresh-btn\\nvariant: primary\\nonAction: refresh-stats\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: button\nid: refresh-btn\ntext: Refresh Stats\nvariant: primary\nonAction: refresh-stats\n```",
+          "tokenUsage": {
+            "total": 7042,
+            "prompt": 7010,
+            "completion": 32,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 577,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "738",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:00 GMT",
+                "modal-function-call-id": "fc-01KWC8JMX4D3GAC3RGV8RSSJ7C",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes button missing required text field (preserves opaque onAction label)",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: button\nid: refresh-btn\nvariant: primary\nonAction: refresh-stats\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-contains-component.mjs",
+              "config": {
+                "expected": "type: button\nid: refresh-btn\nonAction: refresh-stats\n",
+                "hasFields": [
+                  "text"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 5,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: button\nid: refresh-btn\nvariant: primary\nonAction: refresh-stats\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "738",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:00 GMT",
+              "modal-function-call-id": "fc-01KWC8JMX4D3GAC3RGV8RSSJ7C",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 0 info, 2 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 2 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Component \"profile-form\" matches expected block",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-contains-component.mjs",
+                "config": {
+                  "expected": "type: form\nid: profile-form\n",
+                  "hasFields": [
+                    "onSubmit"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "8281e6c2-3bdb-4166-97db-b05d410e83d6",
+        "latencyMs": 1390,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 2 issue(s) that need to be fixed:\\n\\n1. [error] schema-conformance #profile-form → onSubmit: onSubmit: Required\\n2. [error] form-submit-action #profile-form → onSubmit: Form must have an onSubmit action\\n\\n---\\n\\n```mdma\\ntype: form\\nid: profile-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email\\n    required: true\\n    sensitive: true\\n  - name: display-name\\n    type: text\\n    label: Display Name\\n  - name: bio\\n    type: textarea\\n    label: Bio\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: profile-form\nfields:\n  - name: email\n    type: email\n    label: Email\n    required: true\n    sensitive: true\n  - name: display-name\n    type: text\n    label: Display Name\n  - name: bio\n    type: textarea\n    label: Bio\nonSubmit: profile-updated-callout\n```\n\n```mdma\ntype: callout\nid: profile-updated-callout\nvariant: success\ncontent: Your profile has been successfully updated.\n```",
+          "tokenUsage": {
+            "total": 7210,
+            "prompt": 7087,
+            "completion": 123,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1390,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1059",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:01 GMT",
+                "modal-function-call-id": "fc-01KWC8JNFY8732SM94PD679BGX",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes form missing required onSubmit field",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: form\nid: profile-form\nfields:\n  - name: email\n    type: email\n    label: Email\n    required: true\n    sensitive: true\n  - name: display-name\n    type: text\n    label: Display Name\n  - name: bio\n    type: textarea\n    label: Bio\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-contains-component.mjs",
+              "config": {
+                "expected": "type: form\nid: profile-form\n",
+                "hasFields": [
+                  "onSubmit"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 6,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: form\nid: profile-form\nfields:\n  - name: email\n    type: email\n    label: Email\n    required: true\n    sensitive: true\n  - name: display-name\n    type: text\n    label: Display Name\n  - name: bio\n    type: textarea\n    label: Bio\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1059",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:01 GMT",
+              "modal-function-call-id": "fc-01KWC8JNFY8732SM94PD679BGX",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 0 info, 1 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 1 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Component \"leave-approval\" matches expected block",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-contains-component.mjs",
+                "config": {
+                  "expected": "type: approval-gate\nid: leave-approval\ntitle: Manager Approval\nrequiredApprovers: 2\nallowedRoles:\n  - manager\n  - hr\nonApprove: leave-confirmed\n"
+                }
+              }
+            }
+          ]
+        },
+        "id": "5e4f04d6-7f9c-4fab-9b2b-897379ee4653",
+        "latencyMs": 903,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 2 issue(s) that need to be fixed:\\n\\n1. [warning] field-name-typos #leave-approval → roles: Field \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #leave-approval → approvers: Field \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n\\n---\\n\\n```mdma\\ntype: approval-gate\\nid: leave-approval\\ntitle: Manager Approval\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: leave-confirmed\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: approval-gate\nid: leave-approval\ntitle: Manager Approval\nallowedRoles:\n  - manager\n  - hr\nrequiredApprovers: 2\nonApprove: leave-confirmed\n```",
+          "tokenUsage": {
+            "total": 7134,
+            "prompt": 7084,
+            "completion": 50,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 903,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "795",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:02 GMT",
+                "modal-function-call-id": "fc-01KWC8JQ1M50K3ZEKM7ZD40W3P",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes field name typos on approval-gate (roles→allowedRoles, approvers→requiredApprovers)",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: approval-gate\nid: leave-approval\ntitle: Manager Approval\nroles:\n  - manager\n  - hr\napprovers: 2\nonApprove: leave-confirmed\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-contains-component.mjs",
+              "config": {
+                "expected": "type: approval-gate\nid: leave-approval\ntitle: Manager Approval\nrequiredApprovers: 2\nallowedRoles:\n  - manager\n  - hr\nonApprove: leave-confirmed\n"
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 7,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: approval-gate\nid: leave-approval\ntitle: Manager Approval\nroles:\n  - manager\n  - hr\napprovers: 2\nonApprove: leave-confirmed\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "795",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:02 GMT",
+              "modal-function-call-id": "fc-01KWC8JQ1M50K3ZEKM7ZD40W3P",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 0 info, 1 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 1 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "icontains",
+                "value": "product:"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Component \"sales-table\" matches expected block",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-contains-component.mjs",
+                "config": {
+                  "expected": "type: table\nid: sales-table\ncolumns:\n  - key: product\n    header: Product\n  - key: revenue\n    header: Revenue\n  - key: units\n    header: Units Sold\ndata:\n  - product: Widget A\n    revenue: 50000\n    units: 120\n  - product: Widget B\n    revenue: 32000\n    units: 85\n"
+                }
+              }
+            }
+          ]
+        },
+        "id": "a23ec8ce-3dd9-42fe-9d5d-c293d0eb46c5",
+        "latencyMs": 1222,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 6 issue(s) that need to be fixed:\\n\\n1. [warning] table-data-keys #sales-table → data[0].product_name: Data key \\\"product_name\\\" does not match any column (defined columns: product, revenue, units)\\n2. [warning] table-data-keys #sales-table → data[0].total_revenue: Data key \\\"total_revenue\\\" does not match any column (defined columns: product, revenue, units)\\n3. [warning] table-data-keys #sales-table → data[0].quantity: Data key \\\"quantity\\\" does not match any column (defined columns: product, revenue, units)\\n4. [warning] table-data-keys #sales-table → columns: Column \\\"product\\\" has no matching keys in any data row\\n5. [warning] table-data-keys #sales-table → columns: Column \\\"revenue\\\" has no matching keys in any data row\\n6. [warning] table-data-keys #sales-table → columns: Column \\\"units\\\" has no matching keys in any data row\\n\\n---\\n\\n```mdma\\ntype: table\\nid: sales-table\\ncolumns:\\n  - key: product\\n    header: Product\\n  - key: revenue\\n    header: Revenue\\n  - key: units\\n    header: Units Sold\\ndata:\\n  - product_name: Widget A\\n    total_revenue: 50000\\n    quantity: 120\\n  - product_name: Widget B\\n    total_revenue: 32000\\n    quantity: 85\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: table\nid: sales-table\ncolumns:\n  - key: product\n    header: Product\n  - key: revenue\n    header: Revenue\n  - key: units\n    header: Units Sold\ndata:\n  - product: Widget A\n    revenue: 50000\n    units: 120\n  - product: Widget B\n    revenue: 32000\n    units: 85\n```",
+          "tokenUsage": {
+            "total": 7405,
+            "prompt": 7300,
+            "completion": 105,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1222,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "926",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:03 GMT",
+                "modal-function-call-id": "fc-01KWC8JQSH1A22X2SA8VN5S587",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes table data key mismatch (data keys don't match column keys)",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: table\nid: sales-table\ncolumns:\n  - key: product\n    header: Product\n  - key: revenue\n    header: Revenue\n  - key: units\n    header: Units Sold\ndata:\n  - product_name: Widget A\n    total_revenue: 50000\n    quantity: 120\n  - product_name: Widget B\n    total_revenue: 32000\n    quantity: 85\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "icontains",
+              "value": "product:"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-contains-component.mjs",
+              "config": {
+                "expected": "type: table\nid: sales-table\ncolumns:\n  - key: product\n    header: Product\n  - key: revenue\n    header: Revenue\n  - key: units\n    header: Units Sold\ndata:\n  - product: Widget A\n    revenue: 50000\n    units: 120\n  - product: Widget B\n    revenue: 32000\n    units: 85\n"
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 8,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: table\nid: sales-table\ncolumns:\n  - key: product\n    header: Product\n  - key: revenue\n    header: Revenue\n  - key: units\n    header: Units Sold\ndata:\n  - product_name: Widget A\n    total_revenue: 50000\n    quantity: 120\n  - product_name: Widget B\n    total_revenue: 32000\n    quantity: 85\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "926",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:03 GMT",
+              "modal-function-call-id": "fc-01KWC8JQSH1A22X2SA8VN5S587",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 0 info, 1 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 1 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "icontains",
+                "value": "xAxis: Month"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Component \"sales-chart\" matches expected block",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-contains-component.mjs",
+                "config": {
+                  "expected": "type: chart\nid: sales-chart\nvariant: bar\nxAxis: Month\nyAxis:\n  - Revenue\n  - Costs\n"
+                }
+              }
+            }
+          ]
+        },
+        "id": "c548db7a-72de-4965-b183-6cd236588e36",
+        "latencyMs": 1098,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 3 issue(s) that need to be fixed:\\n\\n1. [warning] chart-validation #sales-chart → xAxis: xAxis \\\"Date\\\" does not match any CSV header (available: Month, Revenue, Costs)\\n2. [warning] chart-validation #sales-chart → yAxis: yAxis \\\"Profit\\\" does not match any CSV header (available: Month, Revenue, Costs)\\n3. [warning] chart-validation #sales-chart → yAxis: yAxis \\\"Expenses\\\" does not match any CSV header (available: Month, Revenue, Costs)\\n\\n---\\n\\n```mdma\\ntype: chart\\nid: sales-chart\\nvariant: bar\\ndata: |\\n  Month,Revenue,Costs\\n  Jan,10000,8000\\n  Feb,12000,9000\\n  Mar,15000,11000\\nxAxis: Date\\nyAxis:\\n  - Profit\\n  - Expenses\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: chart\nid: sales-chart\nvariant: bar\ndata: |\n  Month,Revenue,Costs\n  Jan,10000,8000\n  Feb,12000,9000\n  Mar,15000,11000\nxAxis: Month\nyAxis:\n  - Revenue\n  - Costs\n```",
+          "tokenUsage": {
+            "total": 7252,
+            "prompt": 7162,
+            "completion": 90,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1098,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "820",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:04 GMT",
+                "modal-function-call-id": "fc-01KWC8JS4JQAXFN3SRBHV71FXF",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes chart axis mismatch (axes don't match data columns)",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: chart\nid: sales-chart\nvariant: bar\ndata: |\n  Month,Revenue,Costs\n  Jan,10000,8000\n  Feb,12000,9000\n  Mar,15000,11000\nxAxis: Date\nyAxis:\n  - Profit\n  - Expenses\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "icontains",
+              "value": "xAxis: Month"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-contains-component.mjs",
+              "config": {
+                "expected": "type: chart\nid: sales-chart\nvariant: bar\nxAxis: Month\nyAxis:\n  - Revenue\n  - Costs\n"
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 9,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: chart\nid: sales-chart\nvariant: bar\ndata: |\n  Month,Revenue,Costs\n  Jan,10000,8000\n  Feb,12000,9000\n  Mar,15000,11000\nxAxis: Date\nyAxis:\n  - Profit\n  - Expenses\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "820",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:04 GMT",
+              "modal-function-call-id": "fc-01KWC8JS4JQAXFN3SRBHV71FXF",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 1 info, 1 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 1 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Component \"patient-form\" matches expected block",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-contains-component.mjs",
+                "config": {
+                  "expected": "type: form\nid: patient-form\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n    sensitive: true\n  - name: phone\n    type: text\n    label: Phone Number\n    sensitive: true\n  - name: ssn\n    type: text\n    label: Social Security Number\n    sensitive: true\n  - name: address\n    type: textarea\n    label: Home Address\n    sensitive: true\nonSubmit: registration-complete\n"
+                }
+              }
+            }
+          ]
+        },
+        "id": "bc85b5b3-b894-4dc6-bf0f-426f234bdb20",
+        "latencyMs": 1541,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 4 issue(s) that need to be fixed:\\n\\n1. [warning] sensitive-flags #patient-form → fields[1]: Form field \\\"email\\\" appears to contain PII but is missing sensitive: true\\n2. [warning] sensitive-flags #patient-form → fields[2]: Form field \\\"phone\\\" appears to contain PII but is missing sensitive: true\\n3. [warning] sensitive-flags #patient-form → fields[3]: Form field \\\"ssn\\\" appears to contain PII but is missing sensitive: true\\n4. [warning] sensitive-flags #patient-form → fields[4]: Form field \\\"address\\\" appears to contain PII but is missing sensitive: true\\n\\n---\\n\\n```mdma\\ntype: form\\nid: patient-form\\nfields:\\n  - name: full-name\\n    type: text\\n    label: Full Name\\n    required: true\\n  - name: email\\n    type: email\\n    label: Email Address\\n  - name: phone\\n    type: text\\n    label: Phone Number\\n  - name: ssn\\n    type: text\\n    label: Social Security Number\\n  - name: address\\n    type: textarea\\n    label: Home Address\\nonSubmit: registration-complete\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: patient-form\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n    sensitive: true\n  - name: phone\n    type: text\n    label: Phone Number\n    sensitive: true\n  - name: ssn\n    type: text\n    label: Social Security Number\n    sensitive: true\n  - name: address\n    type: textarea\n    label: Home Address\n    sensitive: true\nonSubmit: registration-complete\n```",
+          "tokenUsage": {
+            "total": 7358,
+            "prompt": 7219,
+            "completion": 139,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1541,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1127",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:06 GMT",
+                "modal-function-call-id": "fc-01KWC8JT8FCPKVHV98MA51W7PQ",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes missing PII sensitive flags on form fields",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: form\nid: patient-form\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n  - name: phone\n    type: text\n    label: Phone Number\n  - name: ssn\n    type: text\n    label: Social Security Number\n  - name: address\n    type: textarea\n    label: Home Address\nonSubmit: registration-complete\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-contains-component.mjs",
+              "config": {
+                "expected": "type: form\nid: patient-form\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n    sensitive: true\n  - name: phone\n    type: text\n    label: Phone Number\n    sensitive: true\n  - name: ssn\n    type: text\n    label: Social Security Number\n    sensitive: true\n  - name: address\n    type: textarea\n    label: Home Address\n    sensitive: true\nonSubmit: registration-complete\n"
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 10,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: form\nid: patient-form\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n  - name: phone\n    type: text\n    label: Phone Number\n  - name: ssn\n    type: text\n    label: Social Security Number\n  - name: address\n    type: textarea\n    label: Home Address\nonSubmit: registration-complete\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1127",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:06 GMT",
+              "modal-function-call-id": "fc-01KWC8JT8FCPKVHV98MA51W7PQ",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 0 info, 1 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 1 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Component \"patient-records\" matches expected block",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-contains-component.mjs",
+                "config": {
+                  "expected": "type: table\nid: patient-records\ncolumns:\n  - key: name\n    header: Patient Name\n    sensitive: true\n  - key: email\n    header: Email\n    sensitive: true\n  - key: phone\n    header: Phone\n    sensitive: true\n  - key: dob\n    header: Date of Birth\n    sensitive: true\n"
+                }
+              }
+            }
+          ]
+        },
+        "id": "a6119703-40e0-4414-9e6a-f3c0b715864c",
+        "latencyMs": 1479,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 4 issue(s) that need to be fixed:\\n\\n1. [warning] sensitive-flags #patient-records → columns[0]: Table column \\\"name\\\" appears to contain PII but is missing sensitive: true\\n2. [warning] sensitive-flags #patient-records → columns[1]: Table column \\\"email\\\" appears to contain PII but is missing sensitive: true\\n3. [warning] sensitive-flags #patient-records → columns[2]: Table column \\\"phone\\\" appears to contain PII but is missing sensitive: true\\n4. [warning] sensitive-flags #patient-records → columns[3]: Table column \\\"dob\\\" appears to contain PII but is missing sensitive: true\\n\\n---\\n\\n```mdma\\ntype: table\\nid: patient-records\\ncolumns:\\n  - key: name\\n    header: Patient Name\\n  - key: email\\n    header: Email\\n  - key: phone\\n    header: Phone\\n  - key: dob\\n    header: Date of Birth\\ndata:\\n  - name: Jane Doe\\n    email: jane@example.com\\n    phone: 555-0101\\n    dob: 1990-01-15\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: table\nid: patient-records\ncolumns:\n  - key: name\n    header: Patient Name\n    sensitive: true\n  - key: email\n    header: Email\n    sensitive: true\n  - key: phone\n    header: Phone\n    sensitive: true\n  - key: dob\n    header: Date of Birth\n    sensitive: true\ndata:\n  - name: Jane Doe\n    email: jane@example.com\n    phone: 555-0101\n    dob: 1990-01-15\n```",
+          "tokenUsage": {
+            "total": 7345,
+            "prompt": 7212,
+            "completion": 133,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1479,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1022",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:07 GMT",
+                "modal-function-call-id": "fc-01KWC8JW003BTN4F9BMZ6ZRBH9",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes missing PII sensitive flags on table columns",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: table\nid: patient-records\ncolumns:\n  - key: name\n    header: Patient Name\n  - key: email\n    header: Email\n  - key: phone\n    header: Phone\n  - key: dob\n    header: Date of Birth\ndata:\n  - name: Jane Doe\n    email: jane@example.com\n    phone: 555-0101\n    dob: 1990-01-15\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-contains-component.mjs",
+              "config": {
+                "expected": "type: table\nid: patient-records\ncolumns:\n  - key: name\n    header: Patient Name\n    sensitive: true\n  - key: email\n    header: Email\n    sensitive: true\n  - key: phone\n    header: Phone\n    sensitive: true\n  - key: dob\n    header: Date of Birth\n    sensitive: true\n"
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 11,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: table\nid: patient-records\ncolumns:\n  - key: name\n    header: Patient Name\n  - key: email\n    header: Email\n  - key: phone\n    header: Phone\n  - key: dob\n    header: Date of Birth\ndata:\n  - name: Jane Doe\n    email: jane@example.com\n    phone: 555-0101\n    dob: 1990-01-15\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1022",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:07 GMT",
+              "modal-function-call-id": "fc-01KWC8JW003BTN4F9BMZ6ZRBH9",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 0 info, 1 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 1 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "1 unique kebab-case IDs",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/unique-kebab-ids.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "No placeholder content found in visible output",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/no-placeholder-content.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Component \"employee-form\" matches expected block",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-contains-component.mjs",
+                "config": {
+                  "expected": "type: form\nid: employee-form\n",
+                  "hasFields": [
+                    "onSubmit"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "69c4da50-4944-42ca-9d8a-2e67ea32f38e",
+        "latencyMs": 1767,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 4 issue(s) that need to be fixed:\\n\\n1. [error] schema-conformance #employee_form → fields.1.label: fields.1.label: Required\\n2. [warning] id-format #employee_form → id: ID \\\"employee_form\\\" is not kebab-case. Expected format: \\\"my-component-id\\\"\\n3. [warning] sensitive-flags #employee_form → fields[1]: Form field \\\"email\\\" appears to contain PII but is missing sensitive: true\\n4. [warning] select-options #employee_form → fields[2]: Select field \\\"department\\\" is missing options\\n\\n---\\n\\n```mdma\\ntype: form\\nid: employee_form\\nfields:\\n  - name: full_name\\n    type: text\\n    label: TODO\\n    required: true\\n  - name: email\\n    type: email\\n  - name: department\\n    type: select\\n    label: Department\\n  - name: start_date\\n    type: date\\n    label: Start Date\\nonSubmit: missing-handler\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: employee-form\nfields:\n  - name: full_name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n    sensitive: true\n  - name: department\n    type: select\n    label: Department\n    options:\n      - label: Engineering\n        value: engineering\n      - label: Sales\n        value: sales\n      - label: Marketing\n        value: marketing\n      - label: Human Resources\n        value: hr\n  - name: start_date\n    type: date\n    label: Start Date\nonSubmit: submit-employee-form\n```",
+          "tokenUsage": {
+            "total": 7345,
+            "prompt": 7183,
+            "completion": 162,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1767,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1237",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:09 GMT",
+                "modal-function-call-id": "fc-01KWC8JXG6FBVCG4G88PYP0B41",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes many issues on a single form (ID format, placeholder, PII, select)",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: form\nid: employee_form\nfields:\n  - name: full_name\n    type: text\n    label: TODO\n    required: true\n  - name: email\n    type: email\n  - name: department\n    type: select\n    label: Department\n  - name: start_date\n    type: date\n    label: Start Date\nonSubmit: missing-handler\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/unique-kebab-ids.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/no-placeholder-content.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-contains-component.mjs",
+              "config": {
+                "expected": "type: form\nid: employee-form\n",
+                "hasFields": [
+                  "onSubmit"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 12,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: form\nid: employee_form\nfields:\n  - name: full_name\n    type: text\n    label: TODO\n    required: true\n  - name: email\n    type: email\n  - name: department\n    type: select\n    label: Department\n  - name: start_date\n    type: date\n    label: Start Date\nonSubmit: missing-handler\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1237",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:09 GMT",
+              "modal-function-call-id": "fc-01KWC8JXG6FBVCG4G88PYP0B41",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 0 info, 1 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 1 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "No placeholder content found in visible output",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/no-placeholder-content.mjs"
+              }
+            }
+          ]
+        },
+        "id": "731f1702-41f2-4fc7-957c-1815809ca177",
+        "latencyMs": 1098,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 0 issue(s) that need to be fixed:\\n\\n\\n\\n---\\n\\n```mdma\\ntype: form\\nid: project-form\\nfields:\\n  - name: project-name\\n    type: text\\n    label: \\\"TODO: add label\\\"\\n    required: true\\n  - name: description\\n    type: textarea\\n    label: \\\"...\\\"\\n  - name: team-size\\n    type: number\\n    label: FIXME\\nonSubmit: project-summary\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: project-form\nfields:\n  - name: project-name\n    type: text\n    label: Project Name\n    required: true\n  - name: description\n    type: textarea\n    label: Project Description\n  - name: team-size\n    type: number\n    label: Team Size\nonSubmit: project-summary\n```",
+          "tokenUsage": {
+            "total": 7136,
+            "prompt": 7051,
+            "completion": 85,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1098,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "930",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:11 GMT",
+                "modal-function-call-id": "fc-01KWC8JZ97XCPEA9WNN48DMTXW",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes placeholder content in form field labels",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: form\nid: project-form\nfields:\n  - name: project-name\n    type: text\n    label: \"TODO: add label\"\n    required: true\n  - name: description\n    type: textarea\n    label: \"...\"\n  - name: team-size\n    type: number\n    label: FIXME\nonSubmit: project-summary\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/no-placeholder-content.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 13,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: form\nid: project-form\nfields:\n  - name: project-name\n    type: text\n    label: \"TODO: add label\"\n    required: true\n  - name: description\n    type: textarea\n    label: \"...\"\n  - name: team-size\n    type: number\n    label: FIXME\nonSubmit: project-summary\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "930",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:11 GMT",
+              "modal-function-call-id": "fc-01KWC8JZ97XCPEA9WNN48DMTXW",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer resolved all errors (0 warnings, 0 info, 1 blocks)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-resolves-errors.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Fixer preserved 1 mdma block(s) (min: 1)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/fixer-preserves-components.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "No placeholder content found in visible output",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/no-placeholder-content.mjs"
+              }
+            }
+          ]
+        },
+        "id": "82aac01a-070f-4f93-b7fe-1416509256aa",
+        "latencyMs": 767,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an expert MDMA document author. MDMA (Markdown Document with Mounted Applications) extends standard Markdown with interactive components defined in fenced code blocks using the `mdma` language tag. Think before you generate content, and ensure it adheres to the MDMA format and authoring rules.\\n\\nCRITICAL: Your output IS the Markdown document — write headings, paragraphs, and ```mdma blocks directly. NEVER wrap your response in ```markdown code fences. Your response is already rendered as Markdown.\\n\\n## Document Format\\n\\nAn MDMA document is a standard Markdown file that contains one or more interactive component blocks. Each component block is a YAML snippet inside a fenced code block tagged with `mdma`. Here is an example of what your output should look like — note there are NO outer ```markdown fences:\\n\\n# My Document Title\\n\\nSome regular Markdown content here.\\n\\n```mdma\\ntype: form\\nid: contact-form\\nfields:\\n  - name: email\\n    type: email\\n    label: Email Address\\n    required: true\\n```\\n\\nMore Markdown content can follow.\\n\\n## Component Types\\n\\nMDMA supports 9 component types. Every component shares these base fields:\\n\\n- **id** (string, required) — Unique identifier within the document\\n- **type** (string, required) — Component type name\\n- **label** (string, optional) — Display label\\n- **sensitive** (boolean, default: false) — If true, values are redacted in logs\\n- **disabled** (boolean | binding, default: false) — accepts `true`, `false`, or a quoted binding like `\\\"{{checklist.completed}}\\\"`\\n- **visible** (boolean | binding, default: true) — accepts `true`, `false`, or a quoted binding like `\\\"{{form.field}}\\\"`\\n- **meta** (object, optional) — Arbitrary metadata\\n\\n### 1. form\\n\\nCollects user input via structured fields.\\n\\n```mdma\\ntype: form\\nid: <unique-id>\\nfields:\\n  - name: <field-name>           # required, string\\n    type: text | number | email | date | select | checkbox | textarea | file\\n    label: <display-label>       # required, string\\n    required: true | false       # default: false\\n    sensitive: true | false      # default: false — set true for PII\\n    defaultValue: <any>          # optional\\n    options:                     # required when type is \\\"select\\\"\\n      - label: <label>\\n        value: <value>\\n    validation:                  # optional\\n      pattern: <regex>\\n      min: <number>\\n      max: <number>\\n      message: <error-message>\\n    bind: \\\"{{variable.path}}\\\"    # optional binding\\nonSubmit: <action-id>            # required — action triggered on submit\\n```\\n\\n### 2. button\\n\\nTriggers an action when clicked.\\n\\n```mdma\\ntype: button\\nid: <unique-id>\\ntext: <button-label>             # required, string\\nvariant: primary | secondary | danger | ghost   # default: primary\\nonAction: <action-id>           # required — action triggered on click\\nconfirm:                         # optional confirmation dialog\\n  title: <dialog-title>\\n  message: <dialog-message>\\n  confirmText: <confirm-label>   # default: \\\"Confirm\\\"\\n  cancelText: <cancel-label>     # default: \\\"Cancel\\\"\\n```\\n\\n### 3. tasklist\\n\\nA checklist of items that can be individually checked off.\\n\\n```mdma\\ntype: tasklist\\nid: <unique-id>\\nitems:\\n  - id: <item-id>               # required, unique within tasklist\\n    text: <item-description>    # required, string\\n    checked: true | false       # default: false\\n    required: true | false      # default: false\\n    bind: \\\"{{variable.path}}\\\"   # optional binding\\nonComplete: <action-id>         # optional — triggered when all items checked\\n```\\n\\n### 4. table\\n\\nDisplays tabular data with optional sorting, filtering, and pagination.\\n\\n```mdma\\ntype: table\\nid: <unique-id>\\ncolumns:\\n  - key: <field-key>            # required, string\\n    header: <column-header>     # required, string\\n    sortable: true | false      # default: false\\n    sensitive: true | false     # default: false\\n    width: <css-width>          # optional, e.g. \\\"200px\\\"\\ndata:                           # array of row objects OR a binding\\n  - { key1: value1, key2: value2 }\\n# OR: data: \\\"{{variable.path}}\\\"\\nsortable: true | false          # default: false (table-level)\\nfilterable: true | false        # default: false\\npageSize: <number>              # optional, positive integer\\n```\\n\\n### 5. callout\\n\\nDisplays a highlighted message block.\\n\\n```mdma\\ntype: callout\\nid: <unique-id>\\nvariant: info | warning | error | success   # default: info\\ntitle: <optional-title>\\ncontent: <message-text>          # required, string\\ndismissible: true | false        # default: false\\n```\\n\\n### 6. approval-gate\\n\\nBlocks workflow progression until required approvals are received.\\n\\n```mdma\\ntype: approval-gate\\nid: <unique-id>\\ntitle: <gate-title>              # required, string\\ndescription: <details>           # optional\\nrequiredApprovers: <number>      # default: 1, positive integer\\nallowedRoles:                    # optional — restrict who can approve\\n  - <role-name>\\nonApprove: <action-id>          # optional\\nonDeny: <action-id>             # optional\\nrequireReason: true | false      # default: false — require reason on denial\\n```\\n\\n### 7. webhook\\n\\nMakes an HTTP request when triggered by an action.\\n\\n```mdma\\ntype: webhook\\nid: <unique-id>\\nurl: <endpoint-url>              # required, valid URL or binding\\nmethod: GET | POST | PUT | PATCH | DELETE   # default: POST\\nheaders:                         # optional, key-value pairs\\n  Content-Type: application/json\\nbody:                            # optional, object or binding\\n  key: value\\ntrigger: <action-id>            # required — action ID that triggers this webhook\\nretries: <0-5>                  # default: 0\\ntimeout: <milliseconds>         # default: 30000\\n```\\n\\n### 8. chart\\n\\nDisplays data as a visual chart. Data is provided as a compact CSV-like multiline string where the first row contains column headers and subsequent rows contain comma-separated values.\\n\\n```mdma\\ntype: chart\\nid: <unique-id>\\nvariant: line | bar | area | pie            # default: line\\nlabel: <chart-title>                         # optional\\ndata: |                                      # required — CSV multiline string or binding\\n  <col1>, <col2>, <col3>\\n  <val1>, <val2>, <val3>\\n  <val1>, <val2>, <val3>\\nxAxis: <column-name>                         # optional — column for x-axis (default: first column)\\nyAxis: <column-name> | [<col1>, <col2>]      # optional — column(s) for y-axis (default: all numeric non-xAxis columns)\\ncolors:                                      # optional — custom color palette\\n  - \\\"<hex-color>\\\"\\nshowLegend: true | false                     # default: true\\nshowGrid: true | false                       # default: true\\nheight: <number>                             # default: 300 (pixels)\\nstacked: true | false                        # default: false (for bar/area)\\n```\\n\\n**Data format rules:**\\n- First line of `data` = column headers (comma-separated, trimmed)\\n- Subsequent lines = data rows (comma-separated, trimmed)\\n- Numeric values are auto-detected and coerced\\n- Use YAML `|` block scalar for multi-line data\\n\\n**Example — line chart:**\\n```mdma\\ntype: chart\\nid: revenue-chart\\nvariant: line\\nlabel: Revenue Trend\\ndata: |\\n  month, revenue, costs\\n  Jan, 45000, 32000\\n  Feb, 52000, 35000\\n  Mar, 48000, 31000\\n  Apr, 61000, 38000\\nxAxis: month\\n```\\n\\n**Example — pie chart:**\\n```mdma\\ntype: chart\\nid: market-share\\nvariant: pie\\nlabel: Market Share\\ndata: |\\n  company, share\\n  Ours, 35\\n  Competitor A, 25\\n  Competitor B, 20\\n  Others, 20\\nxAxis: company\\n```\\n\\n### 9. thinking\\n\\nDisplays a collapsible block that shows AI reasoning or thinking process. Useful for transparency about how the AI arrived at conclusions or recommendations.\\n\\n```mdma\\ntype: thinking\\nid: <unique-id>\\nlabel: <header-text>              # optional — default: \\\"Thinking\\\"\\ncontent: |                        # required — free-form reasoning text\\n  <reasoning text>\\nstatus: thinking | done           # default: done — \\\"thinking\\\" shows pulsing animation\\ncollapsed: true | false           # default: true — starts collapsed\\n```\\n\\n- `content`: free-form reasoning text — use YAML `|` block scalar for multi-line\\n- `collapsed: true` by default — thinking is supplementary information\\n- `status: thinking` shows a pulsing animation (use during streaming), `done` when complete\\n\\n**Example — analysis reasoning:**\\n```mdma\\ntype: thinking\\nid: analysis-reasoning\\nlabel: Analyzing revenue data...\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  First, I examined the quarterly revenue trends.\\n  Q3 shows a 15% dip which correlates with seasonal patterns.\\n  However, year-over-year growth remains strong at 23%.\\n  Recommendation: maintain current strategy with Q3 adjustments.\\n```\\n\\n## Binding Syntax\\n\\nUse `{{variable.path}}` to create dynamic bindings between components. Bindings must:\\n- Start with `{{` and end with `}}`\\n- Contain ONLY a dot-notation path starting with a letter or underscore — no operators (no `==`, `!=`, `||`, `&&`, `!`), no quotes, no spaces, no expressions\\n- Reference existing component IDs or context variables\\n\\nExamples:\\n- `{{contact_form.email}}` — bind to the email field of a form\\n- `{{user.name}}` — bind to a context variable\\n\\n**IMPORTANT — binding values on `visible` and `disabled`:**\\n\\nThe base fields `visible` and `disabled` accept either a boolean OR a binding string. When using a binding, the value MUST be a double-quoted string containing `{{...}}`. This is the ONLY correct syntax:\\n\\n```mdma\\ntype: button\\nid: get-started-btn\\ntext: \\\"Get Started\\\"\\ndisabled: \\\"{{onboarding-checklist.completed}}\\\"\\n```\\n\\n```mdma\\ntype: callout\\nid: notification-info\\nvariant: info\\ncontent: \\\"You will receive notifications.\\\"\\nvisible: \\\"{{settings-form.notifications-enabled}}\\\"\\n```\\n\\nCommon mistakes to AVOID:\\n- ❌ `disabled: {{foo.bar}}` — missing quotes, YAML will break\\n- ❌ `disabled: true` when a binding was requested — always reproduce the exact binding\\n- ❌ Omitting `visible` or `disabled` when the blueprint includes it — copy it verbatim\\n- NEVER write `visible: \\\"\\\\\\\"{{foo.bar}}\\\\\\\"\\\"` — do not put literal `\\\"` characters inside the binding string. The YAML quotes are the ONLY quotes; the resolved value must start with `{{` and end with `}}`.\\n- NEVER use comparison or logical operators inside a binding (e.g. `visible: \\\"{{form.severity == 'p0' || form.severity == 'p1'}}\\\"`). Bindings are dot-paths only. If the requested behavior needs conditional logic, bind to a dedicated boolean field (e.g. add a `is-critical: checkbox` field and reference `\\\"{{form.is-critical}}\\\"`), or omit `visible` and let the component always render.\\n\\nWhen a user request includes `visible` or `disabled` with a `{{}}` binding, you MUST reproduce it exactly as a quoted string.\\n\\n## Authoring Rules\\n\\n1. **Unique IDs** — Every component `id` must be unique within the document. Use descriptive kebab-case names (e.g., `employee-onboarding-form`, `submit-btn`).\\n2. **Sensitive data** — Set `sensitive: true` on any field or column that contains PII (personally identifiable information) such as email addresses, phone numbers, SSNs, addresses, or financial data.\\n3. **Required fields** — Mark form fields as `required: true` when the workflow cannot proceed without them.\\n4. **Action labels** — Every `type: form` MUST include an `onSubmit` field. Action-label values (`onSubmit`, `onAction`, `onComplete`, `onApprove`, `onDeny`, `trigger`) are opaque string identifiers — external handlers that the host application wires up at runtime. They do NOT need to match a component in the document. Do NOT invent callouts, webhooks, buttons, or any other component to \\\"complete\\\" or back up an action label.\\n5. **Binding validity** — Every `{{binding}}` must reference a valid source. Do not leave unresolved bindings.\\n6. **Minimal components** — Only include components that are necessary for the workflow. Avoid empty or placeholder components.\\n7. **YAML correctness** — Ensure all YAML in mdma blocks is valid and properly indented. Always wrap string values in double quotes if they contain a colon followed by a space (`: `), e.g. `label: \\\"Step 1: Enter your info\\\"`.\\n8. **Always include thinking** — When generating MDMA components, ALWAYS include a `thinking` block BEFORE the main content to show your reasoning process. Use `status: done` and `collapsed: true`.\\n9. **Never expose MDMA internals to the user** — Do NOT mention thinking blocks, sensitive flags, bindings, component IDs, YAML structure, or any other MDMA implementation details in your visible Markdown text. The user should see a natural, helpful response — not commentary about how the document is built. All reasoning belongs inside the `thinking` block, not in the prose. Never write things like \\\"I included a thinking block\\\" or \\\"the email field is marked as sensitive\\\".\\n10. **Blueprint fidelity** — When the user provides an exact component structure, reproduce EVERY field verbatim, including `visible`, `disabled`, `onComplete`, `onAction`, and binding expressions. Never omit fields, never simplify bindings, never substitute `true`/`false` for a `\\\"{{...}}\\\"` binding. If the blueprint says `disabled: \\\"{{onboarding-checklist.completed}}\\\"`, your output must contain that exact line. If the blueprint says `visible: \\\"{{settings-form.notifications-enabled}}\\\"`, your output must contain that exact line.\\n11. **One interactive component per message** — Each response must contain at most one **interactive** component: `form`, `button`, `webhook`, `approval-gate`, or `tasklist`. Non-interactive components (`callout`, `table`, `chart`, `thinking`) may appear alongside it freely. For multi-step workflows — where the user needs a form, then an approval gate, then a webhook — generate only the current step and tell the user what comes next. Never collapse multiple interactive steps into a single message.\\n\\n## Self-Check Checklist\\n\\nBefore finalizing an MDMA document, verify:\\n\\n- [ ] Every component has a unique `id`\\n- [ ] All PII fields have `sensitive: true`\\n- [ ] All `{{bindings}}` reference valid sources\\n- [ ] Required form fields are marked `required: true`\\n- [ ] Every `type: form` has an `onSubmit` field (an opaque handler label, not a component reference)\\n- [ ] No components were invented to back up `onSubmit`/`onAction`/`onApprove`/etc. labels\\n- [ ] Select fields include an `options` array\\n- [ ] YAML syntax is valid in all mdma blocks\\n- [ ] Table `data` matches the declared `columns` keys\\n- [ ] Approval gates have at least one approver configured\\n- [ ] Webhook URLs are valid or use binding syntax\\n- [ ] All `visible` and `disabled` bindings are double-quoted strings: `\\\"{{component.field}}\\\"`\\n- [ ] Response contains at most one interactive component (`form`, `button`, `webhook`, `approval-gate`, `tasklist`)\\n\\n\\n---\\n\\nYou are an MDMA document fixer. You receive a Markdown document containing ```mdma component blocks along with a list of validation errors that could NOT be auto-fixed. Your job is to output a corrected version of the entire document that resolves every listed issue.\\n\\n## Rules\\n\\n1. **Fix every listed issue.** Each error includes a rule ID, component ID, field, and description. Address them all.\\n2. **Preserve everything else.** Do not change parts of the document that are not related to the reported errors. Keep all headings, paragraphs, and working components exactly as they are.\\n3. **Output the full document.** Return the complete corrected Markdown — not just the changed blocks. The output must be a valid MDMA document ready to render.\\n4. **Follow MDMA conventions:**\\n   - IDs must be unique and kebab-case\\n   - PII fields must have `sensitive: true`\\n   - Bindings use `{{component-id.field}}` syntax\\n   - Select fields must have `options` defined\\n   - Action targets (`onSubmit`, `onAction`, `trigger`, etc.) must reference existing component IDs\\n   - Every ```mdma block contains exactly one component in YAML\\n5. **Do NOT wrap your response in an outer code fence.** Respond in plain Markdown with ```mdma blocks inline, just like a normal MDMA document.\\n6. **Do NOT add explanations or commentary.** Output only the fixed document. Do NOT introduce a `thinking` component to explain your reasoning, narrate the fix, or describe the workflow — `thinking` blocks are not commentary, and you must not invent one. Only keep a `thinking` block if it was present in the input.\\n7. **Do NOT introduce new errors.** Every component you output must be valid. Use real URLs (e.g. `https://api.example.com/endpoint`), real labels, and real content. Never output placeholder or dummy values.\\n8. **Replace ALL placeholder text.** If any field contains \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", \\\"Lorem ipsum\\\", \\\"sample\\\", or similar stub text, you MUST replace it with real, meaningful content. This is mandatory — do not keep any placeholder text in your output.\\n\\n## Prompt Compliance\\n\\nWhen **Original Prompt Requirements** are provided, you MUST ensure the fixed document complies with them:\\n- Use the exact component IDs specified in the prompt\\n- Include the exact field names, types, and labels the prompt requires\\n- Use the correct select options, approval roles, webhook URLs, etc.\\n- If the original document used wrong names/IDs that differ from the prompt, fix them to match the prompt\\n- The prompt requirements take precedence over whatever the original document contained\\n\\n## Structure & YAML Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Duplicate ID` | RENAME one of the duplicates to a new unique kebab-case ID — keep BOTH components in the output. Pick a name that reflects the component's role (e.g. duplicate `employee_form` on a button → rename the button to `employee-form-submit`). Never delete a component to resolve a duplicate-ID error. |\\n| `ID is not kebab-case` | Convert to kebab-case: `myForm` → `my-form`, `user_table` → `user-table` |\\n| `Unknown component type` | Change to a valid type: form, button, table, callout, tasklist, approval-gate, webhook, chart, thinking |\\n| `text: Required` | Add a `text` field with a human-readable button label |\\n| `content: Required` | Add a `content` field with meaningful text |\\n| `Missing table headers` | Add `header` to each column, derived from `key` (e.g. `first_name` → `First Name`) |\\n| `Missing form labels` | Add `label` to each field, derived from `name` |\\n\\n### Duplicate ID example\\n\\nInput — `form` and `button` share the id `employee_form` (4 blocks total):\\n\\n```text\\nform#employee_form, tasklist#onboarding-tasks, button#employee_form, webhook#notify-hr\\n```\\n\\nOutput — rename the button, keep all 4 blocks:\\n\\n```text\\nform#employee-form, tasklist#onboarding-tasks, button#employee-form-submit, webhook#notify-hr\\n```\\n\\n## Binding & Reference Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Binding must be wrapped in {{ }}` | Wrap the bare path in double braces AND quote it: `bind: \\\"{{form.field}}\\\"`. This applies to ANY field that accepts bindings: `bind`, `disabled`, `visible`, `data`. ALWAYS use the format `\\\"{{path}}\\\"` with double braces and quotes. |\\n| `Empty binding expression` | The value is `{{ }}` or `{{}}` which is meaningless. Replace it with a valid binding path like `\\\"{{component.field}}\\\"` or remove the `bind` property entirely. |\\n| `Cross-reference does not match any component ID` | Fix the target to reference an existing component ID in the document |\\n| `component not found in document` | The binding references a non-existent component. Fix the component ID in the binding path. |\\n| `form has no field named` | The binding references a field that doesn't exist on the form. Fix the field name to match an actual field. |\\n\\n## PII & Sensitive Data Fixes\\n\\nFields containing PII (email, phone, SSN, address, card numbers, DOB, medical data) MUST have `sensitive: true`.\\n\\nCheck both:\\n- Form fields: add `sensitive: true` to the field object\\n- Table columns: add `sensitive: true` to the column object\\n\\nAlso check for fields that should be `required: true` — names, emails, titles are typically required.\\n\\n## Form-Specific Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Missing options on select field` | Add an `options` array with `{label, value}` objects. Generate realistic options for the field context. |\\n| `field is likely a typo` | Rename the field to the suggested correct name (e.g. `onClick` → `onAction`, `submit` → `onSubmit`) |\\n| `placeholder content` | Replace placeholder text like \\\"TODO\\\", \\\"TBD\\\", \\\"FIXME\\\", \\\"...\\\", or \\\"Lorem ipsum\\\" with real, meaningful content appropriate to the context. NEVER keep placeholder text — always replace it. |\\n| `outside of a \\\\`\\\\`\\\\`mdma fenced block` | The YAML component is missing its fenced code block wrapper. Wrap it in ```mdma ... ```. Each component must be in its own separate fenced block. |\\n\\n## Table & Chart Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `Data key does not match any column` | Rename the data keys to match defined column keys, or add missing columns |\\n| `Column has no matching keys in any data row` | Either add matching data or remove the unused column |\\n| `xAxis does not match any CSV header` | Fix xAxis to reference an actual CSV column header |\\n| `yAxis does not match any CSV header` | Fix yAxis values to reference actual CSV column headers |\\n| `Chart data does not appear to be valid CSV` | Ensure CSV has a header row and at least one data row |\\n\\n## Approval & Webhook Fixes\\n\\n| Error | How to fix |\\n|-------|-----------|\\n| `field is likely a typo` on approval-gate | `roles` → `allowedRoles`, `approvers` → `requiredApprovers` |\\n| `trigger: Required` | Add a `trigger` field pointing to the component ID that should activate this webhook |\\n| `Cross-reference in trigger does not match` | Fix the trigger to reference an existing component ID |\\n| Missing `title` on approval-gate | Add a descriptive title |\\n| Missing `url` on webhook | Add a valid URL (e.g. `https://api.example.com/endpoint`) |\\n\\n## Examples\\n\\n### Example 1 — Broken action references\\n\\n**Issues reported:**\\n1. [error] cross-reference #order-form → onSubmit: \\\"submit-handler\\\" does not match any component ID\\n2. [error] schema-conformance #order-webhook → trigger: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: submit-handler\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: form\\nid: order-form\\nfields:\\n  - name: product\\n    type: text\\n    label: Product Name\\n    required: true\\nonSubmit: order-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: order-webhook\\nurl: https://api.example.com/orders\\nmethod: POST\\ntrigger: order-form\\n```\\n\\n```mdma\\ntype: callout\\nid: order-status\\nvariant: success\\ncontent: Your order has been submitted!\\n```\\n\\n---\\n\\n### Example 2 — Field name typos\\n\\n**Issues reported:**\\n1. [warning] field-name-typos #review-gate → \\\"roles\\\" is likely a typo — did you mean \\\"allowedRoles\\\"?\\n2. [warning] field-name-typos #review-gate → \\\"approvers\\\" is likely a typo — did you mean \\\"requiredApprovers\\\"?\\n3. [error] schema-conformance #submit-btn → text: Required\\n\\n**Broken document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nroles:\\n  - manager\\n  - hr\\napprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n**Fixed document:**\\n\\n```mdma\\ntype: approval-gate\\nid: review-gate\\ntitle: Manager Review\\nallowedRoles:\\n  - manager\\n  - hr\\nrequiredApprovers: 2\\nonApprove: confirmed\\n```\\n\\n```mdma\\ntype: callout\\nid: confirmed\\nvariant: success\\ncontent: Request approved!\\n```\\n\\n```mdma\\ntype: button\\nid: submit-btn\\ntext: Submit for Review\\nvariant: primary\\nonAction: review-gate\\n```\\n\\n---\\n\\n### Example 3 — Multi-step flow in single message (no conversation history)\\n\\n**Issues reported:**\\n1. [error] flow-ordering (document): Multi-step flow in single message — \\\"intake-form\\\" targets \\\"approval-gate\\\" via onSubmit\\n\\n**Broken document:**\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: approval-gate\\n```\\n\\n```mdma\\ntype: approval-gate\\nid: approval-gate\\ntitle: Manager Approval\\nrequiredApprovers: 1\\nonApprove: notify-webhook\\n```\\n\\n```mdma\\ntype: webhook\\nid: notify-webhook\\nurl: https://api.example.com/notify\\nmethod: POST\\ntrigger: approval-gate\\n```\\n\\n**Fixed document** (no prior conversation — output step 1 only):\\n\\n```mdma\\ntype: form\\nid: intake-form\\nfields:\\n  - name: reason\\n    type: textarea\\n    label: Reason\\nonSubmit: submitted-callout\\n```\\n\\n```mdma\\ntype: callout\\nid: submitted-callout\\nvariant: info\\ncontent: Your request has been submitted and is awaiting manager approval.\\n```\"},{\"role\":\"user\",\"content\":\"Fix the following MDMA document. The validator found 0 issue(s) that need to be fixed:\\n\\n\\n\\n---\\n\\n```mdma\\ntype: callout\\nid: project-summary\\nvariant: info\\ntitle: TBD\\ncontent: Lorem ipsum dolor sit amet\\n```\\n\"}]",
+          "label": "own-model/prompt-fixer.mjs",
+          "config": {}
+        },
+        "promptId": "e25ccd6780e971ee0cfbee787f70dd2ef7f01a62b8537dd686e8a67a7b6c024c",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: callout\nid: project-summary\nvariant: info\ntitle: Project Overview\ncontent: This document provides a high-level summary of the project's current status, goals, and key milestones.\n```",
+          "tokenUsage": {
+            "total": 7048,
+            "prompt": 6997,
+            "completion": 51,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 767,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "833",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:12 GMT",
+                "modal-function-call-id": "fc-01KWC8K0BSX8NAPS429V8SFSF5",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Fixes placeholder content in callout title and content",
+          "vars": {
+            "brokenDocument": "```mdma\ntype: callout\nid: project-summary\nvariant: info\ntitle: TBD\ncontent: Lorem ipsum dolor sit amet\n```\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-resolves-errors.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/fixer-preserves-components.mjs",
+              "config": {
+                "min": 1
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/no-placeholder-content.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 14,
+        "vars": {
+          "brokenDocument": "```mdma\ntype: callout\nid: project-summary\nvariant: info\ntitle: TBD\ncontent: Lorem ipsum dolor sit amet\n```\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "833",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:12 GMT",
+              "modal-function-call-id": "fc-01KWC8K0BSX8NAPS429V8SFSF5",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      }
+    ],
+    "stats": {
+      "successes": 15,
+      "failures": 0,
+      "errors": 0,
+      "tokenUsage": {
+        "prompt": 106601,
+        "completion": 1384,
+        "cached": 0,
+        "total": 107985,
+        "numRequests": 15,
+        "completionDetails": {
+          "reasoning": 0,
+          "acceptedPrediction": 0,
+          "rejectedPrediction": 0,
+          "cacheReadInputTokens": 0,
+          "cacheCreationInputTokens": 0
+        },
+        "assertions": {
+          "total": 0,
+          "prompt": 0,
+          "completion": 0,
+          "cached": 0,
+          "numRequests": 0,
+          "completionDetails": {
+            "reasoning": 0,
+            "acceptedPrediction": 0,
+            "rejectedPrediction": 0,
+            "cacheReadInputTokens": 0,
+            "cacheCreationInputTokens": 0
+          }
+        }
+      },
+      "durationMs": 19125,
+      "evaluationDurationMs": 19125
+    }
+  },
+  "config": {
+    "tags": {},
+    "description": "MDMA Fixer Eval (capability probe) — own model",
+    "prompts": [
+      "file:///Users/marcinsadowski/GIT/mr-mdma/evals/own-model/prompt-fixer.mjs"
+    ],
+    "providers": [
+      {
+        "id": "openai:chat:mdma-26b",
+        "config": {
+          "apiBaseUrl": "https://REDACTED.modal.run/v1",
+          "apiKey": "[REDACTED]",
+          "temperature": 1,
+          "max_tokens": 2048,
+          "min_p": 0.02,
+          "repetition_penalty": 1.1,
+          "chat_template_kwargs": {
+            "enable_thinking": false
+          }
+        }
+      }
+    ],
+    "tests": [
+      {
+        "description": "Fixes button missing required text field",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: button\nid: action-btn\nvariant: primary\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-contains-component.mjs",
+            "config": {
+              "expected": "type: button\nid: action-btn\nvariant: primary\n",
+              "hasFields": [
+                "text"
+              ]
+            }
+          }
+        ]
+      },
+      {
+        "description": "Fixes callout missing required content field",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: callout\nid: status-notice\nvariant: info\ntitle: System Status\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-contains-component.mjs",
+            "config": {
+              "expected": "type: callout\nid: status-notice\nvariant: info\ntitle: System Status\n",
+              "hasFields": [
+                "content"
+              ]
+            }
+          }
+        ]
+      },
+      {
+        "description": "Fixes select field missing required options array",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: form\nid: contact-form\nfields:\n  - name: contact-type\n    type: select\n    label: Contact Type\n    required: true\n  - name: message\n    type: textarea\n    label: Message\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-contains-component.mjs",
+            "config": {
+              "expected": "type: form\nid: contact-form\nfields:\n  - name: contact-type\n    type: select\n    label: Contact Type\n    required: true\n  - name: message\n    type: textarea\n    label: Message\n"
+            }
+          }
+        ]
+      },
+      {
+        "description": "Fixes placeholder title and content in callout",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: callout\nid: welcome-callout\nvariant: info\ntitle: TODO\ncontent: Lorem ipsum dolor sit amet\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/no-placeholder-content.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-contains-component.mjs",
+            "config": {
+              "expected": "type: callout\nid: welcome-callout\nvariant: info\n",
+              "hasFields": [
+                "title",
+                "content"
+              ]
+            }
+          }
+        ]
+      },
+      {
+        "description": "Fixes email and phone fields missing sensitive flag",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: form\nid: contact-details\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n  - name: phone\n    type: text\n    label: Phone Number\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-contains-component.mjs",
+            "config": {
+              "expected": "type: form\nid: contact-details\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: Phone Number\n    sensitive: true\n"
+            }
+          }
+        ]
+      },
+      {
+        "description": "Fixes button missing required text field (preserves opaque onAction label)",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: button\nid: refresh-btn\nvariant: primary\nonAction: refresh-stats\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-contains-component.mjs",
+            "config": {
+              "expected": "type: button\nid: refresh-btn\nonAction: refresh-stats\n",
+              "hasFields": [
+                "text"
+              ]
+            }
+          }
+        ]
+      },
+      {
+        "description": "Fixes form missing required onSubmit field",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: form\nid: profile-form\nfields:\n  - name: email\n    type: email\n    label: Email\n    required: true\n    sensitive: true\n  - name: display-name\n    type: text\n    label: Display Name\n  - name: bio\n    type: textarea\n    label: Bio\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-contains-component.mjs",
+            "config": {
+              "expected": "type: form\nid: profile-form\n",
+              "hasFields": [
+                "onSubmit"
+              ]
+            }
+          }
+        ]
+      },
+      {
+        "description": "Fixes field name typos on approval-gate (roles→allowedRoles, approvers→requiredApprovers)",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: approval-gate\nid: leave-approval\ntitle: Manager Approval\nroles:\n  - manager\n  - hr\napprovers: 2\nonApprove: leave-confirmed\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-contains-component.mjs",
+            "config": {
+              "expected": "type: approval-gate\nid: leave-approval\ntitle: Manager Approval\nrequiredApprovers: 2\nallowedRoles:\n  - manager\n  - hr\nonApprove: leave-confirmed\n"
+            }
+          }
+        ]
+      },
+      {
+        "description": "Fixes table data key mismatch (data keys don't match column keys)",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: table\nid: sales-table\ncolumns:\n  - key: product\n    header: Product\n  - key: revenue\n    header: Revenue\n  - key: units\n    header: Units Sold\ndata:\n  - product_name: Widget A\n    total_revenue: 50000\n    quantity: 120\n  - product_name: Widget B\n    total_revenue: 32000\n    quantity: 85\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "icontains",
+            "value": "product:"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-contains-component.mjs",
+            "config": {
+              "expected": "type: table\nid: sales-table\ncolumns:\n  - key: product\n    header: Product\n  - key: revenue\n    header: Revenue\n  - key: units\n    header: Units Sold\ndata:\n  - product: Widget A\n    revenue: 50000\n    units: 120\n  - product: Widget B\n    revenue: 32000\n    units: 85\n"
+            }
+          }
+        ]
+      },
+      {
+        "description": "Fixes chart axis mismatch (axes don't match data columns)",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: chart\nid: sales-chart\nvariant: bar\ndata: |\n  Month,Revenue,Costs\n  Jan,10000,8000\n  Feb,12000,9000\n  Mar,15000,11000\nxAxis: Date\nyAxis:\n  - Profit\n  - Expenses\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "icontains",
+            "value": "xAxis: Month"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-contains-component.mjs",
+            "config": {
+              "expected": "type: chart\nid: sales-chart\nvariant: bar\nxAxis: Month\nyAxis:\n  - Revenue\n  - Costs\n"
+            }
+          }
+        ]
+      },
+      {
+        "description": "Fixes missing PII sensitive flags on form fields",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: form\nid: patient-form\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n  - name: phone\n    type: text\n    label: Phone Number\n  - name: ssn\n    type: text\n    label: Social Security Number\n  - name: address\n    type: textarea\n    label: Home Address\nonSubmit: registration-complete\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-contains-component.mjs",
+            "config": {
+              "expected": "type: form\nid: patient-form\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n    sensitive: true\n  - name: phone\n    type: text\n    label: Phone Number\n    sensitive: true\n  - name: ssn\n    type: text\n    label: Social Security Number\n    sensitive: true\n  - name: address\n    type: textarea\n    label: Home Address\n    sensitive: true\nonSubmit: registration-complete\n"
+            }
+          }
+        ]
+      },
+      {
+        "description": "Fixes missing PII sensitive flags on table columns",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: table\nid: patient-records\ncolumns:\n  - key: name\n    header: Patient Name\n  - key: email\n    header: Email\n  - key: phone\n    header: Phone\n  - key: dob\n    header: Date of Birth\ndata:\n  - name: Jane Doe\n    email: jane@example.com\n    phone: 555-0101\n    dob: 1990-01-15\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-contains-component.mjs",
+            "config": {
+              "expected": "type: table\nid: patient-records\ncolumns:\n  - key: name\n    header: Patient Name\n    sensitive: true\n  - key: email\n    header: Email\n    sensitive: true\n  - key: phone\n    header: Phone\n    sensitive: true\n  - key: dob\n    header: Date of Birth\n    sensitive: true\n"
+            }
+          }
+        ]
+      },
+      {
+        "description": "Fixes many issues on a single form (ID format, placeholder, PII, select)",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: form\nid: employee_form\nfields:\n  - name: full_name\n    type: text\n    label: TODO\n    required: true\n  - name: email\n    type: email\n  - name: department\n    type: select\n    label: Department\n  - name: start_date\n    type: date\n    label: Start Date\nonSubmit: missing-handler\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/unique-kebab-ids.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/no-placeholder-content.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-contains-component.mjs",
+            "config": {
+              "expected": "type: form\nid: employee-form\n",
+              "hasFields": [
+                "onSubmit"
+              ]
+            }
+          }
+        ]
+      },
+      {
+        "description": "Fixes placeholder content in form field labels",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: form\nid: project-form\nfields:\n  - name: project-name\n    type: text\n    label: \"TODO: add label\"\n    required: true\n  - name: description\n    type: textarea\n    label: \"...\"\n  - name: team-size\n    type: number\n    label: FIXME\nonSubmit: project-summary\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/no-placeholder-content.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Fixes placeholder content in callout title and content",
+        "vars": {
+          "brokenDocument": "```mdma\ntype: callout\nid: project-summary\nvariant: info\ntitle: TBD\ncontent: Lorem ipsum dolor sit amet\n```\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-resolves-errors.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/fixer-preserves-components.mjs",
+            "config": {
+              "min": 1
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/no-placeholder-content.mjs"
+          }
+        ]
+      }
+    ],
+    "scenarios": [],
+    "env": {},
+    "outputPath": [
+      "own-model/results-fixer.json"
+    ],
+    "extensions": [],
+    "metadata": {},
+    "evaluateOptions": {}
+  },
+  "shareableUrl": null,
+  "metadata": {
+    "promptfooVersion": "0.121.9",
+    "nodeVersion": "v22.22.0",
+    "platform": "darwin",
+    "arch": "arm64",
+    "exportedAt": "2026-06-30T12:38:13.905Z",
+    "evaluationCreatedAt": "2026-06-30T12:37:54.142Z"
+  }
+}
\ No newline at end of file
diff --git a/evals/own-model/results-flows.json b/evals/own-model/results-flows.json
new file mode 100644
index 0000000..19eef94
--- /dev/null
+++ b/evals/own-model/results-flows.json
@@ -0,0 +1,3302 @@
+{
+  "evalId": "eval-OA7-2026-06-30T12:35:53",
+  "results": {
+    "version": 3,
+    "timestamp": "2026-06-30T12:35:53.402Z",
+    "prompts": [
+      {
+        "raw": "function ({ vars }) {\n  const system = vars.customPrompt\n    ? `${AUTHORING_SYSTEM_PROMPT}\\n\\n---\\n\\n${vars.customPrompt}`\n    : AUTHORING_SYSTEM_PROMPT;\n  return [\n    { role: 'system', content: `{% raw %}${system}{% endraw %}` },\n    { role: 'user', content: `{% raw %}${vars.request}{% endraw %}` },\n  ];\n}",
+        "label": "own-model/prompt-custom.mjs",
+        "config": {},
+        "id": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "provider": "openai:chat:mdma-26b",
+        "metrics": {
+          "score": 15,
+          "testPassCount": 15,
+          "testFailCount": 0,
+          "testErrorCount": 0,
+          "assertPassCount": 63,
+          "assertFailCount": 0,
+          "totalLatencyMs": 64582,
+          "tokenUsage": {
+            "prompt": 23627,
+            "completion": 6114,
+            "cached": 0,
+            "total": 29741,
+            "numRequests": 15,
+            "completionDetails": {
+              "reasoning": 0,
+              "acceptedPrediction": 0,
+              "rejectedPrediction": 0,
+              "cacheReadInputTokens": 0,
+              "cacheCreationInputTokens": 0
+            },
+            "assertions": {
+              "total": 0,
+              "prompt": 0,
+              "completion": 0,
+              "cached": 0,
+              "numRequests": 0,
+              "completionDetails": {
+                "reasoning": 0,
+                "acceptedPrediction": 0,
+                "rejectedPrediction": 0,
+                "cacheReadInputTokens": 0,
+                "cacheCreationInputTokens": 0
+              }
+            }
+          },
+          "namedScores": {},
+          "namedScoresCount": {},
+          "namedScoreWeights": {},
+          "cost": 0
+        }
+      }
+    ],
+    "results": [
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 3 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 2
+                }
+              }
+            }
+          ]
+        },
+        "id": "e0fcc46d-78eb-486b-827c-a383cbde66c8",
+        "latencyMs": 1967,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a website assistant. When a visitor wants to contact the company,\\ngenerate the MDMA document for exactly this DSL intent and nothing else:\\n\\nform#contact-form[full-name*:t, email*^:e, message*:ta](action=contact-submitted)\\n\"},{\"role\":\"user\",\"content\":\"I'd like to send a message to your support team.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Create a contact form with a required name, a required sensitive email, and a required message field; submits via contact-submitted.\n```\n\n```mdma\ntype: form\nid: contact-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\n  - name: message\n    type: textarea\n    label: \"Message\"\n    required: true\nonSubmit: contact-submitted\n```",
+          "tokenUsage": {
+            "total": 1680,
+            "prompt": 1523,
+            "completion": 157,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1967,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1200",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:53 GMT",
+                "modal-function-call-id": "fc-01KWC8ERV44F6CJ9A6WGV6ED2X",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Contact Form flow (DSL)",
+          "vars": {
+            "customPrompt": "You are a website assistant. When a visitor wants to contact the company,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#contact-form[full-name*:t, email*^:e, message*:ta](action=contact-submitted)\n",
+            "request": "I'd like to send a message to your support team."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 2
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 0,
+        "vars": {
+          "customPrompt": "You are a website assistant. When a visitor wants to contact the company,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#contact-form[full-name*:t, email*^:e, message*:ta](action=contact-submitted)\n",
+          "request": "I'd like to send a message to your support team."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1200",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:53 GMT",
+              "modal-function-call-id": "fc-01KWC8ERV44F6CJ9A6WGV6ED2X",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout, form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 7 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 3
+                }
+              }
+            }
+          ]
+        },
+        "id": "85eb74b3-89af-4bd4-bdbc-ea6881b40291",
+        "latencyMs": 4303,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are an engineering support assistant. When a user reports a bug,\\ngenerate the MDMA document for exactly this DSL intent and nothing else:\\n\\ncallout#pagerduty-reminder(variant=warning)\\nform#bug-report-form[title*:t, reporter-email*^:e, severity*:s{P0|P1|P2|P3}, affected-component*:s{frontend|backend|database|auth|payments}, steps-to-reproduce*:ta, expected-behavior*:ta, actual-behavior*:ta](action=triage-bug-report)\\n\"},{\"role\":\"user\",\"content\":\"The checkout page is showing a 500 error for all users in production.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  The user needs a way to report critical errors. I will first provide a warning callout to remind them to check PagerDuty, followed by a multi-field form to capture the error details and route it through the `triage-bug-report` action.\n```\n\n```mdma\ntype: callout\nid: pagerduty-reminder\nvariant: warning\ncontent: \"Critical production error detected. Please ensure you have paged the on-call engineer before proceeding with the formal bug report.\"\n```\n\n```mdma\ntype: form\nid: bug-report-form\nfields:\n  - name: title\n    type: text\n    label: \"Error Title\"\n    required: true\n  - name: reporter-email\n    type: email\n    label: \"Reporter Email\"\n    required: true\n    sensitive: true\n  - name: severity\n    type: select\n    label: \"Severity Level\"\n    required: true\n    options:\n      - { label: \"P0 - Critical\", value: \"P0\" }\n      - { label: \"P1 - High\", value: \"P1\" }\n      - { label: \"P2 - Medium\", value: \"P2\" }\n      - { label: \"P3 - Low\", value: \"P3\" }\n  - name: affected-component\n    type: select\n    label: \"Affected Component\"\n    required: true\n    options:\n      - { label: \"Frontend\", value: \"frontend\" }\n      - { label: \"Backend\", value: \"backend\" }\n      - { label: \"Database\", value: \"database\" }\n      - { label: \"Authentication\", value: \"auth\" }\n      - { label: \"Payments\", value: \"payments\" }\n  - name: steps-to-reproduce\n    type: textarea\n    label: \"Steps to Reproduce\"\n    required: true\n  - name: expected-behavior\n    type: textarea\n    label: \"Expected Behavior\"\n    required: true\n  - name: actual-behavior\n    type: textarea\n    label: \"Actual Behavior\"\n    required: true\nonSubmit: triage-bug-report\n```",
+          "tokenUsage": {
+            "total": 2102,
+            "prompt": 1593,
+            "completion": 509,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 4303,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "2455",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:55 GMT",
+                "modal-function-call-id": "fc-01KWC8ETKTKHB986B2KV6NN6PA",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Bug Report flow (DSL)",
+          "vars": {
+            "customPrompt": "You are an engineering support assistant. When a user reports a bug,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\ncallout#pagerduty-reminder(variant=warning)\nform#bug-report-form[title*:t, reporter-email*^:e, severity*:s{P0|P1|P2|P3}, affected-component*:s{frontend|backend|database|auth|payments}, steps-to-reproduce*:ta, expected-behavior*:ta, actual-behavior*:ta](action=triage-bug-report)\n",
+            "request": "The checkout page is showing a 500 error for all users in production."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 3
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 1,
+        "vars": {
+          "customPrompt": "You are an engineering support assistant. When a user reports a bug,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\ncallout#pagerduty-reminder(variant=warning)\nform#bug-report-form[title*:t, reporter-email*^:e, severity*:s{P0|P1|P2|P3}, affected-component*:s{frontend|backend|database|auth|payments}, steps-to-reproduce*:ta, expected-behavior*:ta, actual-behavior*:ta](action=triage-bug-report)\n",
+          "request": "The checkout page is showing a 500 error for all users in production."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "2455",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:55 GMT",
+              "modal-function-call-id": "fc-01KWC8ETKTKHB986B2KV6NN6PA",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 3 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            }
+          ]
+        },
+        "id": "f715deef-4b70-4933-a7db-696bc3156b6e",
+        "latencyMs": 2155,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are an HR onboarding assistant. For the new hire's first-week setup,\\ngenerate the MDMA document for exactly this DSL intent and nothing else:\\n\\nform#personal-info-form[full-name*:t, preferred-name:t, personal-email*^:e, phone^:t, emergency-contact*^:t](action=review-onboarding-info)\\n\"},{\"role\":\"user\",\"content\":\"Hi, I'm starting next Monday in the engineering team!\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  A required personal information form for onboarding. Includes PII fields for full name, personal email, phone, and emergency contact. Submits via review-onboarding-info.\n```\n\n```mdma\ntype: form\nid: personal-info-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: preferred-name\n    type: text\n    label: \"Preferred Name\"\n  - name: personal-email\n    type: email\n    label: \"Personal Email\"\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: \"Phone Number\"\n    sensitive: true\n  - name: emergency-contact\n    type: text\n    label: \"Emergency Contact Information\"\n    required: true\n    sensitive: true\nonSubmit: review-onboarding-info\n```",
+          "tokenUsage": {
+            "total": 1776,
+            "prompt": 1548,
+            "completion": 228,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2155,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1471",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:59 GMT",
+                "modal-function-call-id": "fc-01KWC8EYVEEKMWPT6B2DSJS6Q3",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Employee Onboarding flow (DSL)",
+          "vars": {
+            "customPrompt": "You are an HR onboarding assistant. For the new hire's first-week setup,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#personal-info-form[full-name*:t, preferred-name:t, personal-email*^:e, phone^:t, emergency-contact*^:t](action=review-onboarding-info)\n",
+            "request": "Hi, I'm starting next Monday in the engineering team!"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 1
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 2,
+        "vars": {
+          "customPrompt": "You are an HR onboarding assistant. For the new hire's first-week setup,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#personal-info-form[full-name*:t, preferred-name:t, personal-email*^:e, phone^:t, emergency-contact*^:t](action=review-onboarding-info)\n",
+          "request": "Hi, I'm starting next Monday in the engineering team!"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1471",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:59 GMT",
+              "modal-function-call-id": "fc-01KWC8EYVEEKMWPT6B2DSJS6Q3",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout, form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 5 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 3
+                }
+              }
+            }
+          ]
+        },
+        "id": "bd39089e-c69a-4c54-b0ad-6fc5768f84b7",
+        "latencyMs": 3157,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a finance assistant (requests over $5,000 need director approval).\\nGenerate the MDMA document for exactly this DSL intent and nothing else:\\n\\ncallout#budget-approval-warning(variant=warning)\\nform#budget-request-form[requester-name*:t, requester-email*^:e, department*:s{engineering|marketing|operations|finance}, requested-amount*:n, business-justification*:ta](action=route-budget-request)\\n\"},{\"role\":\"user\",\"content\":\"I need to request $12,000 for new development servers.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Creating a warning callout for budget thresholds, followed by a request form with department selection and justification requirements.\n```\n\n```mdma\ntype: callout\nid: budget-approval-warning\nvariant: warning\ncontent: \"Budget requests exceeding $5,000 require automatic director approval before processing.\"\n```\n\n```mdma\ntype: form\nid: budget-request-form\nfields:\n  - name: requester-name\n    type: text\n    label: \"Requester Name\"\n    required: true\n  - name: requester-email\n    type: email\n    label: \"Requester Email\"\n    required: true\n    sensitive: true\n  - name: department\n    type: select\n    label: \"Department\"\n    required: true\n    options:\n      - label: \"Engineering\"\n        value: \"engineering\"\n      - label: \"Marketing\"\n        value: \"marketing\"\n      - label: \"Operations\"\n        value: \"operations\"\n      - label: \"Finance\"\n        value: \"finance\"\n  - name: requested-amount\n    type: number\n    label: \"Requested Amount ($)\"\n    required: true\n  - name: business-justification\n    type: textarea\n    label: \"Business Justification\"\n    required: true\nonSubmit: route-budget-request\n```",
+          "tokenUsage": {
+            "total": 1893,
+            "prompt": 1570,
+            "completion": 323,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 3157,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1898",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:36:02 GMT",
+                "modal-function-call-id": "fc-01KWC8F0ZQG7K67TZGMN8Q2W38",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Budget Approval flow (DSL)",
+          "vars": {
+            "customPrompt": "You are a finance assistant (requests over $5,000 need director approval).\nGenerate the MDMA document for exactly this DSL intent and nothing else:\n\ncallout#budget-approval-warning(variant=warning)\nform#budget-request-form[requester-name*:t, requester-email*^:e, department*:s{engineering|marketing|operations|finance}, requested-amount*:n, business-justification*:ta](action=route-budget-request)\n",
+            "request": "I need to request $12,000 for new development servers."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 3
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 3,
+        "vars": {
+          "customPrompt": "You are a finance assistant (requests over $5,000 need director approval).\nGenerate the MDMA document for exactly this DSL intent and nothing else:\n\ncallout#budget-approval-warning(variant=warning)\nform#budget-request-form[requester-name*:t, requester-email*^:e, department*:s{engineering|marketing|operations|finance}, requested-amount*:n, business-justification*:ta](action=route-budget-request)\n",
+          "request": "I need to request $12,000 for new development servers."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1898",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:36:02 GMT",
+              "modal-function-call-id": "fc-01KWC8F0ZQG7K67TZGMN8Q2W38",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 1 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 3 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 1
+                }
+              }
+            }
+          ]
+        },
+        "id": "c1c05bab-6de5-4d92-84ba-bc4c2ba9d9c2",
+        "latencyMs": 3981,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a customer experience assistant collecting post-support feedback.\\nGenerate the MDMA document for exactly this DSL intent and nothing else:\\n\\nform#support-survey-form[support-ticket-id*:t, overall-satisfaction*:s{1|2|3|4|5}, response-time-rating*:s{1|2|3|4|5}, issue-resolved:c, additional-comments:ta, email^:e](action=survey-submitted)\\n\"},{\"role\":\"user\",\"content\":\"I just finished a support call and want to leave feedback.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Support survey form: captures the ticket ID, satisfaction rating, speed rating, resolution status, extra comments, and sensitive email; submits via survey-submitted.\n```\n\n```mdma\ntype: form\nid: support-survey-form\nfields:\n  - name: support-ticket-id\n    type: text\n    label: \"Support Ticket ID\"\n    required: true\n  - name: overall-satisfaction\n    type: select\n    label: \"Overall Satisfaction\"\n    required: true\n    options:\n      - { label: \"1\", value: \"1\" }\n      - { label: \"2\", value: \"2\" }\n      - { label: \"3\", value: \"3\" }\n      - { label: \"4\", value: \"4\" }\n      - { label: \"5\", value: \"5\" }\n  - name: response-time-rating\n    type: select\n    label: \"Response Time Rating\"\n    required: true\n    options:\n      - { label: \"1\", value: \"1\" }\n      - { label: \"2\", value: \"2\" }\n      - { label: \"3\", value: \"3\" }\n      - { label: \"4\", value: \"4\" }\n      - { label: \"5\", value: \"5\" }\n  - name: issue-resolved\n    type: checkbox\n    label: \"Was your issue resolved?\"\n  - name: additional-comments\n    type: textarea\n    label: \"Additional Comments\"\n  - name: email\n    type: email\n    label: \"Email Address\"\n    sensitive: true\nonSubmit: survey-submitted\n```",
+          "tokenUsage": {
+            "total": 1962,
+            "prompt": 1564,
+            "completion": 398,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 3981,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1976",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:36:05 GMT",
+                "modal-function-call-id": "fc-01KWC8F43HM84VF4K8YZXHEDZQ",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Customer Survey flow (DSL)",
+          "vars": {
+            "customPrompt": "You are a customer experience assistant collecting post-support feedback.\nGenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#support-survey-form[support-ticket-id*:t, overall-satisfaction*:s{1|2|3|4|5}, response-time-rating*:s{1|2|3|4|5}, issue-resolved:c, additional-comments:ta, email^:e](action=survey-submitted)\n",
+            "request": "I just finished a support call and want to leave feedback."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 1
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 4,
+        "vars": {
+          "customPrompt": "You are a customer experience assistant collecting post-support feedback.\nGenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#support-survey-form[support-ticket-id*:t, overall-satisfaction*:s{1|2|3|4|5}, response-time-rating*:s{1|2|3|4|5}, issue-resolved:c, additional-comments:ta, email^:e](action=survey-submitted)\n",
+          "request": "I just finished a support call and want to leave feedback."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1976",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:36:05 GMT",
+              "modal-function-call-id": "fc-01KWC8F43HM84VF4K8YZXHEDZQ",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout, chart, chart",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "chart",
+                    "callout",
+                    "table"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 4 components (min: 2)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/component-count.mjs",
+                "config": {
+                  "min": 2
+                }
+              }
+            }
+          ]
+        },
+        "id": "661b4821-dc8d-4955-91d5-dcc07784ea1c",
+        "latencyMs": 3039,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a sales analytics assistant. For the Q1 overview (total $2.34M,\\n111% of target; Jan $720K, Feb $780K, Mar $840K; by product Platform $980K,\\nAnalytics $640K, Integrations $420K, Services $300K), generate the MDMA\\ndocument for exactly this DSL intent and nothing else (use that data):\\n\\ncallout#q1-summary(variant=info)\\nchart#revenue-trend(variant=line)\\nchart#product-breakdown(variant=bar)\\n\"},{\"role\":\"user\",\"content\":\"Give me the Q1 sales overview with charts.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Creating a Q1 sales performance dashboard including an info callout for KPIs and two charts for revenue trend and product breakdown.\n```\n\n```mdma\ntype: callout\nid: q1-summary\nvariant: info\ncontent: |\n  **Q1 Sales Overview**\n  \n  - **Total Revenue:** $2.34M\n  - **Performance:** 111% of target achieved.\n```\n\n```mdma\ntype: chart\nid: revenue-trend\nvariant: line\nlabel: \"Monthly Revenue Trend\"\ndata: |\n  Month, Revenue\n  Jan, 720000\n  Feb, 780000\n  Mar, 840000\nxAxis: Month\n```\n\n```mdma\ntype: chart\nid: product-breakdown\nvariant: bar\nlabel: \"Revenue by Product\"\ndata: |\n  Product, Revenue\n  Platform, 980000\n  Analytics, 640000\n  Integrations, 420000\n  Services, 300000\nxAxis: Product\n```",
+          "tokenUsage": {
+            "total": 1875,
+            "prompt": 1598,
+            "completion": 277,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 3039,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1444",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:36:09 GMT",
+                "modal-function-call-id": "fc-01KWC8F81RHWY876T8Q7J2FCVW",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Sales Dashboard flow (DSL)",
+          "vars": {
+            "customPrompt": "You are a sales analytics assistant. For the Q1 overview (total $2.34M,\n111% of target; Jan $720K, Feb $780K, Mar $840K; by product Platform $980K,\nAnalytics $640K, Integrations $420K, Services $300K), generate the MDMA\ndocument for exactly this DSL intent and nothing else (use that data):\n\ncallout#q1-summary(variant=info)\nchart#revenue-trend(variant=line)\nchart#product-breakdown(variant=bar)\n",
+            "request": "Give me the Q1 sales overview with charts."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "chart",
+                  "callout",
+                  "table"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/component-count.mjs",
+              "config": {
+                "min": 2
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 5,
+        "vars": {
+          "customPrompt": "You are a sales analytics assistant. For the Q1 overview (total $2.34M,\n111% of target; Jan $720K, Feb $780K, Mar $840K; by product Platform $980K,\nAnalytics $640K, Integrations $420K, Services $300K), generate the MDMA\ndocument for exactly this DSL intent and nothing else (use that data):\n\ncallout#q1-summary(variant=info)\nchart#revenue-trend(variant=line)\nchart#product-breakdown(variant=bar)\n",
+          "request": "Give me the Q1 sales overview with charts."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1444",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:36:09 GMT",
+              "modal-function-call-id": "fc-01KWC8F81RHWY876T8Q7J2FCVW",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            }
+          ]
+        },
+        "id": "59f18b0a-0eb0-4ccf-90cd-185e84af7766",
+        "latencyMs": 3207,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are an incident response assistant. Generate the MDMA document for\\nexactly this DSL intent and nothing else:\\n\\nform#incident-intake-form[incident-title*:t, reporter-email*^:e, severity*:s{P1|P2|P3|P4}, affected-systems*:t, description*:ta](action=triage-incident)\\n\"},{\"role\":\"user\",\"content\":\"Our payment processing service is completely down in production.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Incident intake form: captures title, sensitive reporter email, severity (P1-P4), affected systems, and description; submits via triage-incident.\n```\n\n```mdma\ntype: form\nid: incident-intake-form\nfields:\n  - name: incident-title\n    type: text\n    label: \"Incident Title\"\n    required: true\n  - name: reporter-email\n    type: email\n    label: \"Reporter Email\"\n    required: true\n    sensitive: true\n  - name: severity\n    type: select\n    label: \"Severity\"\n    required: true\n    options:\n      - label: \"P1 - Critical\"\n        value: \"P1\"\n      - label: \"P2 - High\"\n        value: \"P2\"\n      - label: \"P3 - Medium\"\n        value: \"P3\"\n      - label: \"P4 - Low\"\n        value: \"P4\"\n  - name: affected-systems\n    type: text\n    label: \"Affected Systems\"\n    required: true\n  - name: description\n    type: textarea\n    label: \"Description\"\n    required: true\nonSubmit: triage-incident\n```",
+          "tokenUsage": {
+            "total": 1837,
+            "prompt": 1540,
+            "completion": 297,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 3207,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1666",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:36:12 GMT",
+                "modal-function-call-id": "fc-01KWC8FB2NYVENK21EP745TNA2",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Incident Triage flow (DSL)",
+          "vars": {
+            "customPrompt": "You are an incident response assistant. Generate the MDMA document for\nexactly this DSL intent and nothing else:\n\nform#incident-intake-form[incident-title*:t, reporter-email*^:e, severity*:s{P1|P2|P3|P4}, affected-systems*:t, description*:ta](action=triage-incident)\n",
+            "request": "Our payment processing service is completely down in production."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 6,
+        "vars": {
+          "customPrompt": "You are an incident response assistant. Generate the MDMA document for\nexactly this DSL intent and nothing else:\n\nform#incident-intake-form[incident-title*:t, reporter-email*^:e, severity*:s{P1|P2|P3|P4}, affected-systems*:t, description*:ta](action=triage-incident)\n",
+          "request": "Our payment processing service is completely down in production."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1666",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:36:12 GMT",
+              "modal-function-call-id": "fc-01KWC8FB2NYVENK21EP745TNA2",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout, form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 6 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 3
+                }
+              }
+            }
+          ]
+        },
+        "id": "17c62b14-fc60-4cfc-b566-5d9ece25c10e",
+        "latencyMs": 4577,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a product operations assistant (5-day review process). Generate the\\nMDMA document for exactly this DSL intent and nothing else:\\n\\ncallout#review-process(variant=info)\\nform#feature-request-form[feature-title*:t, requester-name*:t, requesting-team*:s{engineering|sales|cs|marketing|product}, priority*:s{critical|high|medium|low}, description*:ta, primary-use-case*:ta](action=evaluate-feature-request)\\n\"},{\"role\":\"user\",\"content\":\"We need a bulk export feature for our enterprise customers.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Providing a form for feature requests with a title, requester, team selection, priority level, and multi-line text for use-case details.\n```\n\n```mdma\ntype: callout\nid: review-process\nvariant: info\ncontent: |\n  Feature requests undergo a 5-day review process by the product team before being scheduled for evaluation.\n```\n\n```mdma\ntype: form\nid: feature-request-form\nfields:\n  - name: feature-title\n    type: text\n    label: \"Feature Title\"\n    required: true\n  - name: requester-name\n    type: text\n    label: \"Requester Name\"\n    required: true\n  - name: requesting-team\n    type: select\n    label: \"Requesting Team\"\n    required: true\n    options:\n      - { label: \"Engineering\", value: \"engineering\" }\n      - { label: \"Sales\", value: \"sales\" }\n      - { label: \"CS\", value: \"cs\" }\n      - { label: \"Marketing\", value: \"marketing\" }\n      - { label: \"Product\", value: \"product\" }\n  - name: priority\n    type: select\n    label: \"Priority\"\n    required: true\n    options:\n      - { label: \"Critical\", value: \"critical\" }\n      - { label: \"High\", value: \"high\" }\n      - { label: \"Medium\", value: \"medium\" }\n      - { label: \"Low\", value: \"low\" }\n  - name: description\n    type: textarea\n    label: \"Description\"\n    required: true\n  - name: primary-use-case\n    type: textarea\n    label: \"Primary Use Case\"\n    required: true\nonSubmit: evaluate-feature-request\n```",
+          "tokenUsage": {
+            "total": 2003,
+            "prompt": 1571,
+            "completion": 432,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 4577,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "2180",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:36:15 GMT",
+                "modal-function-call-id": "fc-01KWC8FE80SXNX72VKEQ49NBE5",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Feature Request flow (DSL)",
+          "vars": {
+            "customPrompt": "You are a product operations assistant (5-day review process). Generate the\nMDMA document for exactly this DSL intent and nothing else:\n\ncallout#review-process(variant=info)\nform#feature-request-form[feature-title*:t, requester-name*:t, requesting-team*:s{engineering|sales|cs|marketing|product}, priority*:s{critical|high|medium|low}, description*:ta, primary-use-case*:ta](action=evaluate-feature-request)\n",
+            "request": "We need a bulk export feature for our enterprise customers."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 3
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 7,
+        "vars": {
+          "customPrompt": "You are a product operations assistant (5-day review process). Generate the\nMDMA document for exactly this DSL intent and nothing else:\n\ncallout#review-process(variant=info)\nform#feature-request-form[feature-title*:t, requester-name*:t, requesting-team*:s{engineering|sales|cs|marketing|product}, priority*:s{critical|high|medium|low}, description*:ta, primary-use-case*:ta](action=evaluate-feature-request)\n",
+          "request": "We need a bulk export feature for our enterprise customers."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "2180",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:36:15 GMT",
+              "modal-function-call-id": "fc-01KWC8FE80SXNX72VKEQ49NBE5",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout, chart, chart",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "chart",
+                    "callout",
+                    "form"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 4 components (min: 2)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/component-count.mjs",
+                "config": {
+                  "min": 2
+                }
+              }
+            }
+          ]
+        },
+        "id": "d38db450-b586-47cc-9d57-eb4687af1e69",
+        "latencyMs": 3595,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a scrum master assistant. For Sprint 24 (velocity 42 of 45 target;\\nhistory 38,44,41,43,47; bugs Auth 5, API 3, Frontend 7, DB 2, CI/CD 1),\\ngenerate the MDMA document for exactly this DSL intent and nothing else:\\n\\ncallout#sprint-info(variant=info)\\nchart#velocity-trend(variant=line)\\nchart#bug-distribution(variant=bar)\\n\"},{\"role\":\"user\",\"content\":\"Let's start the Sprint 24 retrospective.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Creating Sprint 24 overview:\n  - info callout providing sprint context (velocity, bug count).\n  - line chart for velocity history (38, 44, 41, 43, 47).\n  - bar chart for bug distribution across Auth, API, Frontend, DB, and CI/CD.\n```\n\n```mdma\ntype: callout\nid: sprint-info\nvariant: info\ncontent: |\n  # Sprint 24 Overview\n  The team completed the sprint with a velocity of 42, falling slightly below the 45 target. Quality efforts focused on frontend stability, as seen in the bug distribution.\n```\n\n```mdma\ntype: chart\nid: velocity-trend\nvariant: line\nlabel: \"Sprint Velocity History\"\ndata: |\n  Sprint, Velocity\n  S20, 38\n  S21, 44\n  S22, 41\n  S23, 43\n  S24, 42\nxAxis: Sprint\n```\n\n```mdma\ntype: chart\nid: bug-distribution\nvariant: bar\nlabel: \"Bugs by Component\"\ndata: |\n  Component, Count\n  Auth, 5\n  API, 3\n  Frontend, 7\n  DB, 2\n  CI/CD, 1\nxAxis: Component\n```",
+          "tokenUsage": {
+            "total": 1907,
+            "prompt": 1576,
+            "completion": 331,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 3595,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1624",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:36:20 GMT",
+                "modal-function-call-id": "fc-01KWC8FJQT0992R9N9BMV30CGQ",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Sprint Retro flow (DSL)",
+          "vars": {
+            "customPrompt": "You are a scrum master assistant. For Sprint 24 (velocity 42 of 45 target;\nhistory 38,44,41,43,47; bugs Auth 5, API 3, Frontend 7, DB 2, CI/CD 1),\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\ncallout#sprint-info(variant=info)\nchart#velocity-trend(variant=line)\nchart#bug-distribution(variant=bar)\n",
+            "request": "Let's start the Sprint 24 retrospective."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "chart",
+                  "callout",
+                  "form"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/component-count.mjs",
+              "config": {
+                "min": 2
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 8,
+        "vars": {
+          "customPrompt": "You are a scrum master assistant. For Sprint 24 (velocity 42 of 45 target;\nhistory 38,44,41,43,47; bugs Auth 5, API 3, Frontend 7, DB 2, CI/CD 1),\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\ncallout#sprint-info(variant=info)\nchart#velocity-trend(variant=line)\nchart#bug-distribution(variant=bar)\n",
+          "request": "Let's start the Sprint 24 retrospective."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1624",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:36:20 GMT",
+              "modal-function-call-id": "fc-01KWC8FJQT0992R9N9BMV30CGQ",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout, table, tasklist",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "table",
+                    "callout",
+                    "tasklist",
+                    "chart",
+                    "button"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 4 components (min: 2)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/component-count.mjs",
+                "config": {
+                  "min": 2
+                }
+              }
+            }
+          ]
+        },
+        "id": "44bc6668-2ca5-4a58-8629-8e30fd1d8ef6",
+        "latencyMs": 4530,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are an e-commerce customer service assistant. For order ORD-2024-78542\\n(in transit, ETA Mar 22; items Wireless Keyboard, USB-C Hub x2, Monitor\\nStand, Cable Kit), generate the MDMA document for exactly this DSL intent\\nand nothing else:\\n\\ncallout#order-status(variant=info)\\ntable#order-items\\ntasklist#delivery-milestones\\n\"},{\"role\":\"user\",\"content\":\"Where is my order ORD-2024-78542?\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Creating tracking dashboard for ORD-2024-78542: an information callout for status, a table for items, and a tasklist for delivery milestones.\n```\n\n```mdma\ntype: callout\nid: order-status\nvariant: info\ncontent: |\n  Your order **ORD-2024-78542** is currently **In Transit**. It is estimated to arrive by **March 22, 2024**.\n```\n\n```mdma\ntype: table\nid: order-items\ncolumns:\n  - key: item\n    header: \"Item Description\"\n  - key: qty\n    header: \"Quantity\"\n  - key: price\n    header: \"Price\"\ndata:\n  - { item: \"Wireless Keyboard\", qty: 1, price: 45.00 }\n  - { item: \"USB-C Hub\", qty: 2, price: 35.00 }\n  - { item: \"Monitor Stand\", qty: 1, price: 29.99 }\n  - { item: \"Cable Kit\", qty: 1, price: 15.00 }\n```\n\n```mdma\ntype: tasklist\nid: delivery-milestones\nitems:\n  - id: order-placed\n    text: \"Order placed and confirmed\"\n  - id: payment-processed\n    text: \"Payment processed successfully\"\n  - id: warehouse-picked\n    text: \"Items picked and packed at warehouse\"\n  - id: shipped\n    text: \"Order shipped from distribution center\"\n  - id: out-for-delivery\n    text: \"Out for delivery\"\n  - id: delivered\n    text: \"Delivered to destination\"\n```",
+          "tokenUsage": {
+            "total": 1986,
+            "prompt": 1566,
+            "completion": 420,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 4530,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1924",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:36:23 GMT",
+                "modal-function-call-id": "fc-01KWC8FP9DDN4CTMSXC3A7SHGF",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Order Tracking flow (DSL)",
+          "vars": {
+            "customPrompt": "You are an e-commerce customer service assistant. For order ORD-2024-78542\n(in transit, ETA Mar 22; items Wireless Keyboard, USB-C Hub x2, Monitor\nStand, Cable Kit), generate the MDMA document for exactly this DSL intent\nand nothing else:\n\ncallout#order-status(variant=info)\ntable#order-items\ntasklist#delivery-milestones\n",
+            "request": "Where is my order ORD-2024-78542?"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "table",
+                  "callout",
+                  "tasklist",
+                  "chart",
+                  "button"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/component-count.mjs",
+              "config": {
+                "min": 2
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 9,
+        "vars": {
+          "customPrompt": "You are an e-commerce customer service assistant. For order ORD-2024-78542\n(in transit, ETA Mar 22; items Wireless Keyboard, USB-C Hub x2, Monitor\nStand, Cable Kit), generate the MDMA document for exactly this DSL intent\nand nothing else:\n\ncallout#order-status(variant=info)\ntable#order-items\ntasklist#delivery-milestones\n",
+          "request": "Where is my order ORD-2024-78542?"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1924",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:36:23 GMT",
+              "modal-function-call-id": "fc-01KWC8FP9DDN4CTMSXC3A7SHGF",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout, form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Callout with variant: warning found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/callout-variant.mjs",
+                "config": {
+                  "variant": "warning"
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 8 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 3
+                }
+              }
+            }
+          ]
+        },
+        "id": "ec00955f-ea09-4dc3-b383-b95f8823c7de",
+        "latencyMs": 5646,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a release management assistant (SOX/ISO compliance). Generate the\\nMDMA document for exactly this DSL intent and nothing else:\\n\\ncallout#risk-assessment-warning(variant=warning)\\nform#change-request-form[change-request-id*:t, jira-ticket*:t, change-title*:t, change-type*:s{standard|normal|emergency}, target-environment*:s{production|staging|pre-prod}, risk-level*:s{low|medium|high|critical}, change-description*:ta, rollback-plan*:ta](action=review-change-request)\\n\"},{\"role\":\"user\",\"content\":\"I need to deploy a database migration to production this weekend.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Creating a risk assessment warning and a change request form for the database migration, including field validation for risk and environment, submitting to the review action.\n```\n\n```mdma\ntype: callout\nid: risk-assessment-warning\nvariant: warning\ncontent: \"Critical: Ensure you have verified your rollback plan and backup status before submitting this change request. High-impact migrations require strict adherence to the SOX/ISO deployment window.\"\n```\n\n```mdma\ntype: form\nid: change-request-form\nfields:\n  - name: change-request-id\n    type: text\n    label: \"Change Request ID\"\n    required: true\n  - name: jira-ticket\n    type: text\n    label: \"Jira Ticket\"\n    required: true\n  - name: change-title\n    type: text\n    label: \"Change Title\"\n    required: true\n  - name: change-type\n    type: select\n    label: \"Change Type\"\n    required: true\n    options:\n      - label: \"Standard\"\n        value: \"standard\"\n      - label: \"Normal\"\n        value: \"normal\"\n      - label: \"Emergency\"\n        value: \"emergency\"\n  - name: target-environment\n    type: select\n    label: \"Target Environment\"\n    required: true\n    options:\n      - label: \"Production\"\n        value: \"production\"\n      - label: \"Staging\"\n        value: \"staging\"\n      - label: \"Pre-Prod\"\n        value: \"pre-prod\"\n  - name: risk-level\n    type: select\n    label: \"Risk Level\"\n    required: true\n    options:\n      - label: \"Low\"\n        value: \"low\"\n      - label: \"Medium\"\n        value: \"medium\"\n      - label: \"High\"\n        value: \"high\"\n      - label: \"Critical\"\n        value: \"critical\"\n  - name: change-description\n    type: textarea\n    label: \"Change Description\"\n    required: true\n  - name: rollback-plan\n    type: textarea\n    label: \"Rollback Plan\"\n    required: true\nonSubmit: review-change-request\n```",
+          "tokenUsage": {
+            "total": 2122,
+            "prompt": 1594,
+            "completion": 528,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 5646,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "2629",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:36:28 GMT",
+                "modal-function-call-id": "fc-01KWC8FTQVVXP24J66V3NFG799",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Change Management flow (DSL)",
+          "vars": {
+            "customPrompt": "You are a release management assistant (SOX/ISO compliance). Generate the\nMDMA document for exactly this DSL intent and nothing else:\n\ncallout#risk-assessment-warning(variant=warning)\nform#change-request-form[change-request-id*:t, jira-ticket*:t, change-title*:t, change-type*:s{standard|normal|emergency}, target-environment*:s{production|staging|pre-prod}, risk-level*:s{low|medium|high|critical}, change-description*:ta, rollback-plan*:ta](action=review-change-request)\n",
+            "request": "I need to deploy a database migration to production this weekend."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/callout-variant.mjs",
+              "config": {
+                "variant": "warning"
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 3
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 10,
+        "vars": {
+          "customPrompt": "You are a release management assistant (SOX/ISO compliance). Generate the\nMDMA document for exactly this DSL intent and nothing else:\n\ncallout#risk-assessment-warning(variant=warning)\nform#change-request-form[change-request-id*:t, jira-ticket*:t, change-title*:t, change-type*:s{standard|normal|emergency}, target-environment*:s{production|staging|pre-prod}, risk-level*:s{low|medium|high|critical}, change-description*:ta, rollback-plan*:ta](action=review-change-request)\n",
+          "request": "I need to deploy a database migration to production this weekend."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "2629",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:36:28 GMT",
+              "modal-function-call-id": "fc-01KWC8FTQVVXP24J66V3NFG799",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout, form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found sensitive: true flag",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-sensitive.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 8 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 3
+                }
+              }
+            }
+          ]
+        },
+        "id": "97119c57-0738-4cef-9ca1-9026cc9cddf4",
+        "latencyMs": 6606,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a customer support escalation assistant with SLA tracking. Generate\\nthe MDMA document for exactly this DSL intent and nothing else:\\n\\ncallout#sla-compliance(variant=warning)\\nform#escalation-intake-form[case-id*:t, customer-name*:t, customer-email*^:e, account-id*^:t, priority*:s{P1|P2|P3|P4}, category*:s{billing|technical|account|other}, customer-sentiment*:s{angry|frustrated|neutral|satisfied}, escalation-reason*:ta](action=route-escalation)\\n\"},{\"role\":\"user\",\"content\":\"A major enterprise customer is threatening to cancel over billing errors.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Building an escalation intake form for critical billing issues. \n  Components: a warning callout regarding SLA compliance and a form for triage/routing.\n  Fields: Case ID, Name, Email (PII), Account ID (PII), Priority (Select), Category (Select), Sentiment (Select), and Reason (Textarea).\n  Action: route-escalation.\n```\n\n```mdma\ntype: callout\nid: sla-compliance\nvariant: warning\ncontent: \"Critical Alert: Escalations involving billing errors for tier-one accounts require resolution within 4 business hours to maintain SLA compliance.\"\n```\n\n```mdma\ntype: form\nid: escalation-intake-form\nfields:\n  - name: case-id\n    type: text\n    label: \"Case ID\"\n    required: true\n  - name: customer-name\n    type: text\n    label: \"Customer Name\"\n    required: true\n  - name: customer-email\n    type: email\n    label: \"Customer Email\"\n    required: true\n    sensitive: true\n  - name: account-id\n    type: text\n    label: \"Account ID\"\n    required: true\n    sensitive: true\n  - name: priority\n    type: select\n    label: \"Priority\"\n    required: true\n    options:\n      - { label: \"P1 - Critical\", value: \"P1\" }\n      - { label: \"P2 - High\", value: \"P2\" }\n      - { label: \"P3 - Medium\", value: \"P3\" }\n      - { label: \"P4 - Low\", value: \"P4\" }\n  - name: category\n    type: select\n    label: \"Category\"\n    required: true\n    options:\n      - { label: \"Billing\", value: \"billing\" }\n      - { label: \"Technical\", value: \"technical\" }\n      - { label: \"Account\", value: \"account\" }\n      - { label: \"Other\", value: \"other\" }\n  - name: customer-sentiment\n    type: select\n    label: \"Customer Sentiment\"\n    required: true\n    options:\n      - { label: \"Angry\", value: \"angry\" }\n      - { label: \"Frustrated\", value: \"frustrated\" }\n      - { label: \"Neutral\", value: \"neutral\" }\n      - { label: \"Satisfied\", value: \"satisfied\" }\n  - name: escalation-reason\n    type: textarea\n    label: \"Escalation Reason\"\n    required: true\nonSubmit: route-escalation\n```",
+          "tokenUsage": {
+            "total": 2206,
+            "prompt": 1596,
+            "completion": 610,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 6606,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "2791",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:36:33 GMT",
+                "modal-function-call-id": "fc-01KWC8G099QBXKEYMFS5V6MF9V",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Customer Escalation flow (DSL)",
+          "vars": {
+            "customPrompt": "You are a customer support escalation assistant with SLA tracking. Generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\ncallout#sla-compliance(variant=warning)\nform#escalation-intake-form[case-id*:t, customer-name*:t, customer-email*^:e, account-id*^:t, priority*:s{P1|P2|P3|P4}, category*:s{billing|technical|account|other}, customer-sentiment*:s{angry|frustrated|neutral|satisfied}, escalation-reason*:ta](action=route-escalation)\n",
+            "request": "A major enterprise customer is threatening to cancel over billing errors."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-sensitive.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 3
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 11,
+        "vars": {
+          "customPrompt": "You are a customer support escalation assistant with SLA tracking. Generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\ncallout#sla-compliance(variant=warning)\nform#escalation-intake-form[case-id*:t, customer-name*:t, customer-email*^:e, account-id*^:t, priority*:s{P1|P2|P3|P4}, category*:s{billing|technical|account|other}, customer-sentiment*:s{angry|frustrated|neutral|satisfied}, escalation-reason*:ta](action=route-escalation)\n",
+          "request": "A major enterprise customer is threatening to cancel over billing errors."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "2791",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:36:33 GMT",
+              "modal-function-call-id": "fc-01KWC8G099QBXKEYMFS5V6MF9V",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout, form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Callout with variant: error found",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/callout-variant.mjs",
+                "config": {
+                  "variant": "error"
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 7 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 3
+                }
+              }
+            }
+          ]
+        },
+        "id": "63863d08-672a-4b0c-9d20-caa7173878af",
+        "latencyMs": 6267,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a clinical operations assistant (patient safety is top priority).\\nGenerate the MDMA document for exactly this DSL intent and nothing else:\\n\\ncallout#safety-review(variant=error)\\nform#procedure-submission-form[procedure-title*:t, change-type*:s{new|major-revision|minor-revision|retirement}, department*:s{radiology|cardiology|surgery|oncology|general}, author-credentials*:t, risk-category*:s{low|medium|high|critical}, clinical-summary*:ta, contraindications*:ta](action=review-procedure)\\n\"},{\"role\":\"user\",\"content\":\"I need to submit a new surgical procedure for the radiology department.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  A critical safety alert is required via a warning callout, followed by a comprehensive procedure submission form containing multi-step validation fields and a risk assessment section. The form is designed to trigger the 'review-procedure' action.\n```\n\n```mdma\ntype: callout\nid: safety-review\nvariant: error\ncontent: |\n  CRITICAL: All new or modified surgical procedures must undergo a mandatory multidisciplinary safety review prior to clinical deployment. Ensure all risk assessments are exhaustive and evidence-based.\n```\n\n```mdma\ntype: form\nid: procedure-submission-form\nfields:\n  - name: procedure-title\n    type: text\n    label: \"Procedure Title\"\n    required: true\n  - name: change-type\n    type: select\n    label: \"Nature of Change\"\n    required: true\n    options:\n      - label: \"New Procedure\"\n        value: \"new\"\n      - label: \"Major Revision\"\n        value: \"major-revision\"\n      - label: \"Minor Revision\"\n        value: \"minor-revision\"\n      - label: \"Retirement\"\n        value: \"retirement\"\n  - name: department\n    type: select\n    label: \"Department\"\n    required: true\n    options:\n      - label: \"Radiology\"\n        value: \"radiology\"\n      - label: \"Cardiology\"\n        value: \"cardiology\"\n      - label: \"Surgery\"\n        value: \"surgery\"\n      - label: \"Oncology\"\n        value: \"oncology\"\n      - label: \"General\"\n        value: \"general\"\n  - name: author-credentials\n    type: text\n    label: \"Author Credentials\"\n    required: true\n  - name: risk-category\n    type: select\n    label: \"Risk Categorization\"\n    required: true\n    options:\n      - label: \"Low Risk\"\n        value: \"low\"\n      - label: \"Medium Risk\"\n        value: \"medium\"\n      - label: \"High Risk\"\n        value: \"high\"\n      - label: \"Critical Risk\"\n        value: \"critical\"\n  - name: clinical-summary\n    type: textarea\n    label: \"Clinical Summary\"\n    required: true\n  - name: contraindications\n    type: textarea\n    label: \"Contraindications\"\n    required: true\nonSubmit: review-procedure\n```",
+          "tokenUsage": {
+            "total": 2156,
+            "prompt": 1590,
+            "completion": 566,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 6267,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "2848",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:36:40 GMT",
+                "modal-function-call-id": "fc-01KWC8G6S3S1CANHBPDVJTPGBG",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Clinical Ops flow (DSL)",
+          "vars": {
+            "customPrompt": "You are a clinical operations assistant (patient safety is top priority).\nGenerate the MDMA document for exactly this DSL intent and nothing else:\n\ncallout#safety-review(variant=error)\nform#procedure-submission-form[procedure-title*:t, change-type*:s{new|major-revision|minor-revision|retirement}, department*:s{radiology|cardiology|surgery|oncology|general}, author-credentials*:t, risk-category*:s{low|medium|high|critical}, clinical-summary*:ta, contraindications*:ta](action=review-procedure)\n",
+            "request": "I need to submit a new surgical procedure for the radiology department."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/callout-variant.mjs",
+              "config": {
+                "variant": "error"
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 3
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 12,
+        "vars": {
+          "customPrompt": "You are a clinical operations assistant (patient safety is top priority).\nGenerate the MDMA document for exactly this DSL intent and nothing else:\n\ncallout#safety-review(variant=error)\nform#procedure-submission-form[procedure-title*:t, change-type*:s{new|major-revision|minor-revision|retirement}, department*:s{radiology|cardiology|surgery|oncology|general}, author-credentials*:t, risk-category*:s{low|medium|high|critical}, clinical-summary*:ta, contraindications*:ta](action=review-procedure)\n",
+          "request": "I need to submit a new surgical procedure for the radiology department."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "2848",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:36:40 GMT",
+              "modal-function-call-id": "fc-01KWC8G6S3S1CANHBPDVJTPGBG",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 4 sensitive flags",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/pii-sensitive.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 6 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 3
+                }
+              }
+            }
+          ]
+        },
+        "id": "9bbc804e-14ca-4c57-a886-110b28128f8a",
+        "latencyMs": 6001,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are a compliance assistant for KYC/AML identity verification. Generate\\nthe MDMA document for exactly this DSL intent and nothing else:\\n\\nform#kyc-case-form[case-reference-id*:t, customer-type*:s{individual|business|trust}, full-legal-name*^:t, date-of-birth*^:d, tax-identifier*^:t, email*^:e, source-of-funds*:s{employment|business|investment|inheritance|other}, risk-rating*:s{low|medium|high|prohibited}](action=kyc-review)\\n\"},{\"role\":\"user\",\"content\":\"We have a new high-risk business entity that needs KYC verification.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  KYC Verification Form: Requires Case Reference ID, Customer Type, Full Legal Name, Date of Birth, Tax Identifier, Email, Source of Funds, and Risk Rating; all PII fields are marked sensitive and the case is submitted via kyc-review.\n```\n\n```mdma\ntype: form\nid: kyc-case-form\nfields:\n  - name: case-reference-id\n    type: text\n    label: \"Case Reference ID\"\n    required: true\n  - name: customer-type\n    type: select\n    label: \"Customer Type\"\n    required: true\n    options:\n      - label: \"Individual\"\n        value: \"individual\"\n      - label: \"Business\"\n        value: \"business\"\n      - label: \"Trust\"\n        value: \"trust\"\n  - name: full-legal-name\n    type: text\n    label: \"Full Legal Name\"\n    required: true\n    sensitive: true\n  - name: date-of-birth\n    type: date\n    label: \"Date of Birth\"\n    required: true\n    sensitive: true\n  - name: tax-identifier\n    type: text\n    label: \"Tax Identifier\"\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\n  - name: source-of-funds\n    type: select\n    label: \"Source of Funds\"\n    required: false\n    options:\n      - label: \"Employment\"\n        value: \"employment\"\n      - label: \"Business\"\n        value: \"business\"\n      - label: \"Investment\"\n        value: \"investment\"\n      - label: \"Inheritance\"\n        value: \"inheritance\"\n      - label: \"Other\"\n        value: \"other\"\n  - name: risk-rating\n    type: select\n    label: \"Risk Rating\"\n    required: false\n    options:\n      - label: \"Low\"\n        value: \"low\"\n      - label: \"Medium\"\n        value: \"medium\"\n      - label: \"High\"\n        value: \"high\"\n      - label: \"Prohibited\"\n        value: \"prohibited\"\nonSubmit: kyc-review\n```",
+          "tokenUsage": {
+            "total": 2139,
+            "prompt": 1597,
+            "completion": 542,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 6001,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "2576",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:36:46 GMT",
+                "modal-function-call-id": "fc-01KWC8GCXY3MH3M03ZMJDJWWNA",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "KYC flow (DSL)",
+          "vars": {
+            "customPrompt": "You are a compliance assistant for KYC/AML identity verification. Generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\nform#kyc-case-form[case-reference-id*:t, customer-type*:s{individual|business|trust}, full-legal-name*^:t, date-of-birth*^:d, tax-identifier*^:t, email*^:e, source-of-funds*:s{employment|business|investment|inheritance|other}, risk-rating*:s{low|medium|high|prohibited}](action=kyc-review)\n",
+            "request": "We have a new high-risk business entity that needs KYC verification."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/pii-sensitive.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 3
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 13,
+        "vars": {
+          "customPrompt": "You are a compliance assistant for KYC/AML identity verification. Generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\nform#kyc-case-form[case-reference-id*:t, customer-type*:s{individual|business|trust}, full-legal-name*^:t, date-of-birth*^:d, tax-identifier*^:t, email*^:e, source-of-funds*:s{employment|business|investment|inheritance|other}, risk-rating*:s{low|medium|high|prohibited}](action=kyc-review)\n",
+          "request": "We have a new high-risk business entity that needs KYC verification."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "2576",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:36:46 GMT",
+              "modal-function-call-id": "fc-01KWC8GCXY3MH3M03ZMJDJWWNA",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Only allowed components generated: callout, form",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/only-components.mjs",
+                "config": {
+                  "allowed": [
+                    "form",
+                    "callout"
+                  ]
+                }
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Assertion passed",
+              "assertion": {
+                "type": "contains",
+                "value": "id: benefits-enroll-2026-spring"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Select field has options",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/select-has-options.mjs"
+              }
+            },
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Found 6 required fields",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/has-required-fields.mjs",
+                "config": {
+                  "min": 3
+                }
+              }
+            }
+          ]
+        },
+        "id": "f22c874d-9b15-43ae-ad1c-5d4468147209",
+        "latencyMs": 5551,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.\\n\\nThe DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.\\n\\n## DSL input — the grammar you read\\n```\\n<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line\\nfield = <name>[*][^]:<typecode>[{opt1|opt2|...}]\\n        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)\\n        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file\\n        {a|b|c} = options for a select field\\nprops = text=\\\"...\\\"  |  action=<id>  |  variant=<name>\\ntypes: form · button · tasklist · table · callout · approval-gate · webhook · chart\\n```\\n\\n## Authoring rules\\n- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow (\\\"needs approval\\\", \\\"review process\\\", etc.). Describe any such follow-up in prose only; do not emit it.\\n- Each ```mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a \\\"components:\\\" array.\\n- Every component has \\\"id\\\" and \\\"type\\\" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).\\n- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.\\n- form: top-level \\\"onSubmit: <action-id>\\\"; \\\"fields\\\" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need \\\"options\\\" (list of {label, value}); mark every PII field \\\"sensitive: true\\\".\\n- button: \\\"text\\\" + \\\"onAction: <action-id>\\\". tasklist: \\\"items\\\" list of {id, text}. table: \\\"columns\\\" (key/header) + \\\"data\\\" rows. callout: \\\"content\\\" + variant ∈ info|warning|error|success. approval-gate: \\\"title\\\". webhook: \\\"url\\\" + \\\"trigger: <action-id>\\\". chart: \\\"label\\\" (never \\\"title\\\") + \\\"data: |\\\" CSV (header line then rows) + variant ∈ line|bar|area|pie.\\n- Forms use \\\"onSubmit\\\", buttons \\\"onAction\\\", webhooks \\\"trigger\\\" — never a bare \\\"action\\\" key.\\n- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).\\n\\n## Turn and reasoning discipline\\n- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is \\\"your turn\\\" to continue.\\n- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.\\n\\n## Examples\\n\\nIntent: `form#contact[full-name*:t, email*^:e](action=contact-submitted)`\\n\\n```mdma\\ntype: thinking\\nid: planning\\nstatus: done\\ncollapsed: true\\ncontent: |\\n  Contact form: a required name and a required, sensitive email; submits via contact-submitted.\\n```\\n\\n```mdma\\ntype: form\\nid: contact\\nfields:\\n  - name: full-name\\n    type: text\\n    label: \\\"Full Name\\\"\\n    required: true\\n  - name: email\\n    type: email\\n    label: \\\"Email\\\"\\n    required: true\\n    sensitive: true\\nonSubmit: contact-submitted\\n```\\n\\nIntent: `table#orders` — invent realistic columns and rows; default to sortable/filterable tables.\\n\\n```mdma\\ntype: table\\nid: orders\\nsortable: true\\nfilterable: true\\ncolumns:\\n  - key: order-id\\n    header: \\\"Order ID\\\"\\n    sortable: true\\n  - key: customer\\n    header: \\\"Customer\\\"\\n    sortable: true\\n  - key: total\\n    header: \\\"Total ($)\\\"\\n    sortable: true\\n  - key: status\\n    header: \\\"Status\\\"\\ndata:\\n  - { order-id: \\\"ORD-1001\\\", customer: \\\"Acme Inc\\\", total: 1240.50, status: \\\"Shipped\\\" }\\n  - { order-id: \\\"ORD-1002\\\", customer: \\\"Globex\\\", total: 880.00, status: \\\"Pending\\\" }\\n  - { order-id: \\\"ORD-1003\\\", customer: \\\"Initech\\\", total: 2310.75, status: \\\"Delivered\\\" }\\n```\\n\\nIntent: `chart#revenue(variant=bar)` — invent a realistic CSV `data` block and a `label`.\\n\\n```mdma\\ntype: chart\\nid: revenue\\nvariant: bar\\nlabel: \\\"Monthly Revenue\\\"\\ndata: |\\n  Month, Revenue\\n  Jan, 42000\\n  Feb, 51000\\n  Mar, 47500\\nxAxis: Month\\n```\\n\\n---\\n\\nYou are an employee benefits enrollment assistant (window closes Mar 31,\\n2026). Generate the MDMA document for exactly this DSL intent and nothing\\nelse (keep the form id):\\n\\ncallout#enrollment-deadline(variant=info)\\nform#benefits-enroll-2026-spring[employee-id*:t, health-plan*:s{hmo|ppo|hdhp|waive}, dental-plan*:s{basic|premium|waive}, vision-plan*:s{standard|premium|waive}, dependents-count*:n, hsa-contribution*:n](action=review-benefits-selection)\\n\"},{\"role\":\"user\",\"content\":\"I want to enroll in this year's health benefits.\"}]",
+          "label": "own-model/prompt-custom.mjs",
+          "config": {}
+        },
+        "promptId": "28560be25442348b67b153aac73d3d53694d44ce28b4b7e71a6c5e9ee9f65827",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: thinking\nid: planning\nstatus: done\ncollapsed: true\ncontent: |\n  Creating an enrollment form for Spring 2026 with selected health, dental, and vision plans, plus HSA and dependents; includes an info callout about the March 31 deadline.\n```\n\n```mdma\ntype: callout\nid: enrollment-deadline\nvariant: info\ncontent: \"Please complete your benefits enrollment by March 31, 2026, to ensure coverage begins on the next cycle.\"\n```\n\n```mdma\ntype: form\nid: benefits-enroll-2026-spring\nfields:\n  - name: employee-id\n    type: text\n    label: \"Employee ID\"\n    required: true\n  - name: health-plan\n    type: select\n    label: \"Health Insurance Plan\"\n    required: true\n    options:\n      - label: \"HMO\"\n        value: \"hmo\"\n      - label: \"PPO\"\n        value: \"ppo\"\n      - label: \"HDHP\"\n        value: \"hdhp\"\n      - label: \"Waive Coverage\"\n        value: \"waive\"\n  - name: dental-plan\n    type: select\n    label: \"Dental Insurance Plan\"\n    required: true\n    options:\n      - label: \"Basic Dental\"\n        value: \"basic\"\n      - label: \"Premium Dental\"\n        value: \"premium\"\n      - label: \"Waive Coverage\"\n        value: \"waive\"\n  - name: vision-plan\n    type: select\n    label: \"Vision Insurance Plan\"\n    required: true\n    options:\n      - label: \"Standard Vision\"\n        value: \"standard\"\n      - label: \"Premium Vision\"\n        value: \"premium\"\n      - label: \"Waive Coverage\"\n        value: \"waive\"\n  - name: dependents-count\n    type: number\n    label: \"Number of Dependents\"\n    required: true\n  - name: hsa-contribution\n    type: number\n    label: \"Annual HSA Contribution\"\n    required: true\nonSubmit: review-benefits-selection\n```",
+          "tokenUsage": {
+            "total": 2097,
+            "prompt": 1601,
+            "completion": 496,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 5551,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "2402",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:36:52 GMT",
+                "modal-function-call-id": "fc-01KWC8GJTXQYJ8HJ2EDT024RWZ",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Benefits enrollment flow, exact id (DSL)",
+          "vars": {
+            "customPrompt": "You are an employee benefits enrollment assistant (window closes Mar 31,\n2026). Generate the MDMA document for exactly this DSL intent and nothing\nelse (keep the form id):\n\ncallout#enrollment-deadline(variant=info)\nform#benefits-enroll-2026-spring[employee-id*:t, health-plan*:s{hmo|ppo|hdhp|waive}, dental-plan*:s{basic|premium|waive}, vision-plan*:s{standard|premium|waive}, dependents-count*:n, hsa-contribution*:n](action=review-benefits-selection)\n",
+            "request": "I want to enroll in this year's health benefits."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/only-components.mjs",
+              "config": {
+                "allowed": [
+                  "form",
+                  "callout"
+                ]
+              }
+            },
+            {
+              "type": "contains",
+              "value": "id: benefits-enroll-2026-spring"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/select-has-options.mjs"
+            },
+            {
+              "type": "javascript",
+              "value": "file://assertions/has-required-fields.mjs",
+              "config": {
+                "min": 3
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 14,
+        "vars": {
+          "customPrompt": "You are an employee benefits enrollment assistant (window closes Mar 31,\n2026). Generate the MDMA document for exactly this DSL intent and nothing\nelse (keep the form id):\n\ncallout#enrollment-deadline(variant=info)\nform#benefits-enroll-2026-spring[employee-id*:t, health-plan*:s{hmo|ppo|hdhp|waive}, dental-plan*:s{basic|premium|waive}, vision-plan*:s{standard|premium|waive}, dependents-count*:n, hsa-contribution*:n](action=review-benefits-selection)\n",
+          "request": "I want to enroll in this year's health benefits."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "2402",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:36:52 GMT",
+              "modal-function-call-id": "fc-01KWC8GJTXQYJ8HJ2EDT024RWZ",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      }
+    ],
+    "stats": {
+      "successes": 15,
+      "failures": 0,
+      "errors": 0,
+      "tokenUsage": {
+        "prompt": 23627,
+        "completion": 6114,
+        "cached": 0,
+        "total": 29741,
+        "numRequests": 15,
+        "completionDetails": {
+          "reasoning": 0,
+          "acceptedPrediction": 0,
+          "rejectedPrediction": 0,
+          "cacheReadInputTokens": 0,
+          "cacheCreationInputTokens": 0
+        },
+        "assertions": {
+          "total": 0,
+          "prompt": 0,
+          "completion": 0,
+          "cached": 0,
+          "numRequests": 0,
+          "completionDetails": {
+            "reasoning": 0,
+            "acceptedPrediction": 0,
+            "rejectedPrediction": 0,
+            "cacheReadInputTokens": 0,
+            "cacheCreationInputTokens": 0
+          }
+        }
+      },
+      "durationMs": 65274,
+      "evaluationDurationMs": 65274
+    }
+  },
+  "config": {
+    "tags": {},
+    "description": "MDMA Example Flows Eval (DSL) — own model",
+    "prompts": [
+      "file:///Users/marcinsadowski/GIT/mr-mdma/evals/own-model/prompt-custom.mjs"
+    ],
+    "providers": [
+      {
+        "id": "openai:chat:mdma-26b",
+        "config": {
+          "apiBaseUrl": "https://REDACTED.modal.run/v1",
+          "apiKey": "[REDACTED]",
+          "temperature": 1,
+          "max_tokens": 2048,
+          "min_p": 0.02,
+          "repetition_penalty": 1.1,
+          "chat_template_kwargs": {
+            "enable_thinking": false
+          }
+        }
+      }
+    ],
+    "tests": [
+      {
+        "description": "Contact Form flow (DSL)",
+        "vars": {
+          "customPrompt": "You are a website assistant. When a visitor wants to contact the company,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#contact-form[full-name*:t, email*^:e, message*:ta](action=contact-submitted)\n",
+          "request": "I'd like to send a message to your support team."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 2
+            }
+          }
+        ]
+      },
+      {
+        "description": "Bug Report flow (DSL)",
+        "vars": {
+          "customPrompt": "You are an engineering support assistant. When a user reports a bug,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\ncallout#pagerduty-reminder(variant=warning)\nform#bug-report-form[title*:t, reporter-email*^:e, severity*:s{P0|P1|P2|P3}, affected-component*:s{frontend|backend|database|auth|payments}, steps-to-reproduce*:ta, expected-behavior*:ta, actual-behavior*:ta](action=triage-bug-report)\n",
+          "request": "The checkout page is showing a 500 error for all users in production."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 3
+            }
+          }
+        ]
+      },
+      {
+        "description": "Employee Onboarding flow (DSL)",
+        "vars": {
+          "customPrompt": "You are an HR onboarding assistant. For the new hire's first-week setup,\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#personal-info-form[full-name*:t, preferred-name:t, personal-email*^:e, phone^:t, emergency-contact*^:t](action=review-onboarding-info)\n",
+          "request": "Hi, I'm starting next Monday in the engineering team!"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 1
+            }
+          }
+        ]
+      },
+      {
+        "description": "Budget Approval flow (DSL)",
+        "vars": {
+          "customPrompt": "You are a finance assistant (requests over $5,000 need director approval).\nGenerate the MDMA document for exactly this DSL intent and nothing else:\n\ncallout#budget-approval-warning(variant=warning)\nform#budget-request-form[requester-name*:t, requester-email*^:e, department*:s{engineering|marketing|operations|finance}, requested-amount*:n, business-justification*:ta](action=route-budget-request)\n",
+          "request": "I need to request $12,000 for new development servers."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 3
+            }
+          }
+        ]
+      },
+      {
+        "description": "Customer Survey flow (DSL)",
+        "vars": {
+          "customPrompt": "You are a customer experience assistant collecting post-support feedback.\nGenerate the MDMA document for exactly this DSL intent and nothing else:\n\nform#support-survey-form[support-ticket-id*:t, overall-satisfaction*:s{1|2|3|4|5}, response-time-rating*:s{1|2|3|4|5}, issue-resolved:c, additional-comments:ta, email^:e](action=survey-submitted)\n",
+          "request": "I just finished a support call and want to leave feedback."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 1
+            }
+          }
+        ]
+      },
+      {
+        "description": "Sales Dashboard flow (DSL)",
+        "vars": {
+          "customPrompt": "You are a sales analytics assistant. For the Q1 overview (total $2.34M,\n111% of target; Jan $720K, Feb $780K, Mar $840K; by product Platform $980K,\nAnalytics $640K, Integrations $420K, Services $300K), generate the MDMA\ndocument for exactly this DSL intent and nothing else (use that data):\n\ncallout#q1-summary(variant=info)\nchart#revenue-trend(variant=line)\nchart#product-breakdown(variant=bar)\n",
+          "request": "Give me the Q1 sales overview with charts."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "chart",
+                "callout",
+                "table"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/component-count.mjs",
+            "config": {
+              "min": 2
+            }
+          }
+        ]
+      },
+      {
+        "description": "Incident Triage flow (DSL)",
+        "vars": {
+          "customPrompt": "You are an incident response assistant. Generate the MDMA document for\nexactly this DSL intent and nothing else:\n\nform#incident-intake-form[incident-title*:t, reporter-email*^:e, severity*:s{P1|P2|P3|P4}, affected-systems*:t, description*:ta](action=triage-incident)\n",
+          "request": "Our payment processing service is completely down in production."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Feature Request flow (DSL)",
+        "vars": {
+          "customPrompt": "You are a product operations assistant (5-day review process). Generate the\nMDMA document for exactly this DSL intent and nothing else:\n\ncallout#review-process(variant=info)\nform#feature-request-form[feature-title*:t, requester-name*:t, requesting-team*:s{engineering|sales|cs|marketing|product}, priority*:s{critical|high|medium|low}, description*:ta, primary-use-case*:ta](action=evaluate-feature-request)\n",
+          "request": "We need a bulk export feature for our enterprise customers."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 3
+            }
+          }
+        ]
+      },
+      {
+        "description": "Sprint Retro flow (DSL)",
+        "vars": {
+          "customPrompt": "You are a scrum master assistant. For Sprint 24 (velocity 42 of 45 target;\nhistory 38,44,41,43,47; bugs Auth 5, API 3, Frontend 7, DB 2, CI/CD 1),\ngenerate the MDMA document for exactly this DSL intent and nothing else:\n\ncallout#sprint-info(variant=info)\nchart#velocity-trend(variant=line)\nchart#bug-distribution(variant=bar)\n",
+          "request": "Let's start the Sprint 24 retrospective."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "chart",
+                "callout",
+                "form"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/component-count.mjs",
+            "config": {
+              "min": 2
+            }
+          }
+        ]
+      },
+      {
+        "description": "Order Tracking flow (DSL)",
+        "vars": {
+          "customPrompt": "You are an e-commerce customer service assistant. For order ORD-2024-78542\n(in transit, ETA Mar 22; items Wireless Keyboard, USB-C Hub x2, Monitor\nStand, Cable Kit), generate the MDMA document for exactly this DSL intent\nand nothing else:\n\ncallout#order-status(variant=info)\ntable#order-items\ntasklist#delivery-milestones\n",
+          "request": "Where is my order ORD-2024-78542?"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "table",
+                "callout",
+                "tasklist",
+                "chart",
+                "button"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/component-count.mjs",
+            "config": {
+              "min": 2
+            }
+          }
+        ]
+      },
+      {
+        "description": "Change Management flow (DSL)",
+        "vars": {
+          "customPrompt": "You are a release management assistant (SOX/ISO compliance). Generate the\nMDMA document for exactly this DSL intent and nothing else:\n\ncallout#risk-assessment-warning(variant=warning)\nform#change-request-form[change-request-id*:t, jira-ticket*:t, change-title*:t, change-type*:s{standard|normal|emergency}, target-environment*:s{production|staging|pre-prod}, risk-level*:s{low|medium|high|critical}, change-description*:ta, rollback-plan*:ta](action=review-change-request)\n",
+          "request": "I need to deploy a database migration to production this weekend."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/callout-variant.mjs",
+            "config": {
+              "variant": "warning"
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 3
+            }
+          }
+        ]
+      },
+      {
+        "description": "Customer Escalation flow (DSL)",
+        "vars": {
+          "customPrompt": "You are a customer support escalation assistant with SLA tracking. Generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\ncallout#sla-compliance(variant=warning)\nform#escalation-intake-form[case-id*:t, customer-name*:t, customer-email*^:e, account-id*^:t, priority*:s{P1|P2|P3|P4}, category*:s{billing|technical|account|other}, customer-sentiment*:s{angry|frustrated|neutral|satisfied}, escalation-reason*:ta](action=route-escalation)\n",
+          "request": "A major enterprise customer is threatening to cancel over billing errors."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-sensitive.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 3
+            }
+          }
+        ]
+      },
+      {
+        "description": "Clinical Ops flow (DSL)",
+        "vars": {
+          "customPrompt": "You are a clinical operations assistant (patient safety is top priority).\nGenerate the MDMA document for exactly this DSL intent and nothing else:\n\ncallout#safety-review(variant=error)\nform#procedure-submission-form[procedure-title*:t, change-type*:s{new|major-revision|minor-revision|retirement}, department*:s{radiology|cardiology|surgery|oncology|general}, author-credentials*:t, risk-category*:s{low|medium|high|critical}, clinical-summary*:ta, contraindications*:ta](action=review-procedure)\n",
+          "request": "I need to submit a new surgical procedure for the radiology department."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/callout-variant.mjs",
+            "config": {
+              "variant": "error"
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 3
+            }
+          }
+        ]
+      },
+      {
+        "description": "KYC flow (DSL)",
+        "vars": {
+          "customPrompt": "You are a compliance assistant for KYC/AML identity verification. Generate\nthe MDMA document for exactly this DSL intent and nothing else:\n\nform#kyc-case-form[case-reference-id*:t, customer-type*:s{individual|business|trust}, full-legal-name*^:t, date-of-birth*^:d, tax-identifier*^:t, email*^:e, source-of-funds*:s{employment|business|investment|inheritance|other}, risk-rating*:s{low|medium|high|prohibited}](action=kyc-review)\n",
+          "request": "We have a new high-risk business entity that needs KYC verification."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/pii-sensitive.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 3
+            }
+          }
+        ]
+      },
+      {
+        "description": "Benefits enrollment flow, exact id (DSL)",
+        "vars": {
+          "customPrompt": "You are an employee benefits enrollment assistant (window closes Mar 31,\n2026). Generate the MDMA document for exactly this DSL intent and nothing\nelse (keep the form id):\n\ncallout#enrollment-deadline(variant=info)\nform#benefits-enroll-2026-spring[employee-id*:t, health-plan*:s{hmo|ppo|hdhp|waive}, dental-plan*:s{basic|premium|waive}, vision-plan*:s{standard|premium|waive}, dependents-count*:n, hsa-contribution*:n](action=review-benefits-selection)\n",
+          "request": "I want to enroll in this year's health benefits."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/only-components.mjs",
+            "config": {
+              "allowed": [
+                "form",
+                "callout"
+              ]
+            }
+          },
+          {
+            "type": "contains",
+            "value": "id: benefits-enroll-2026-spring"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/select-has-options.mjs"
+          },
+          {
+            "type": "javascript",
+            "value": "file://assertions/has-required-fields.mjs",
+            "config": {
+              "min": 3
+            }
+          }
+        ]
+      }
+    ],
+    "scenarios": [],
+    "env": {},
+    "defaultTest": {
+      "assert": [
+        {
+          "type": "javascript",
+          "value": "file://assertions/validate-mdma.mjs",
+          "config": {
+            "exclude": [
+              "flow-ordering"
+            ]
+          }
+        }
+      ],
+      "vars": {},
+      "options": {},
+      "metadata": {}
+    },
+    "outputPath": [
+      "own-model/results-flows.json"
+    ],
+    "extensions": [],
+    "metadata": {},
+    "evaluateOptions": {}
+  },
+  "shareableUrl": null,
+  "metadata": {
+    "promptfooVersion": "0.121.9",
+    "nodeVersion": "v22.22.0",
+    "platform": "darwin",
+    "arch": "arm64",
+    "exportedAt": "2026-06-30T12:36:59.230Z",
+    "evaluationCreatedAt": "2026-06-30T12:35:53.402Z"
+  }
+}
\ No newline at end of file
diff --git a/evals/own-model/results-guidance.json b/evals/own-model/results-guidance.json
new file mode 100644
index 0000000..0a4da2e
--- /dev/null
+++ b/evals/own-model/results-guidance.json
@@ -0,0 +1,2063 @@
+{
+  "evalId": "eval-LAg-2026-06-30T12:38:16",
+  "results": {
+    "version": 3,
+    "timestamp": "2026-06-30T12:38:16.261Z",
+    "prompts": [
+      {
+        "raw": "function ({ vars }) {\n  return [\n    { role: 'system', content: `{% raw %}${SYSTEM_PROMPT}{% endraw %}` },\n    { role: 'user', content: `{% raw %}${vars.request}{% endraw %}` },\n  ];\n}",
+        "label": "own-model/prompt-guidance.mjs",
+        "config": {},
+        "id": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "provider": "openai:chat:mdma-26b",
+        "metrics": {
+          "score": 15,
+          "testPassCount": 15,
+          "testFailCount": 0,
+          "testErrorCount": 0,
+          "assertPassCount": 15,
+          "assertFailCount": 0,
+          "totalLatencyMs": 58193,
+          "tokenUsage": {
+            "prompt": 3369,
+            "completion": 6872,
+            "cached": 0,
+            "total": 10241,
+            "numRequests": 15,
+            "completionDetails": {
+              "reasoning": 0,
+              "acceptedPrediction": 0,
+              "rejectedPrediction": 0,
+              "cacheReadInputTokens": 0,
+              "cacheCreationInputTokens": 0
+            },
+            "assertions": {
+              "total": 0,
+              "prompt": 0,
+              "completion": 0,
+              "cached": 0,
+              "numRequests": 0,
+              "completionDetails": {
+                "reasoning": 0,
+                "acceptedPrediction": 0,
+                "rejectedPrediction": 0,
+                "cacheReadInputTokens": 0,
+                "cacheCreationInputTokens": 0
+              }
+            }
+          },
+          "namedScores": {},
+          "namedScoresCount": {},
+          "namedScoreWeights": {},
+          "cost": 0
+        }
+      }
+    ],
+    "results": [
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly called generate_mdma tool",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs"
+              }
+            }
+          ]
+        },
+        "id": "9082ab2a-5c69-4ef2-bd9b-ee6f00fd232a",
+        "latencyMs": 2175,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"Create a contact form with fields for full name, email address, and a message.\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": [
+            {
+              "id": "chatcmpl-tool-b35e7453c16e9ae6",
+              "type": "function",
+              "function": {
+                "name": "generate_mdma",
+                "arguments": "{\"document\": \"# Contact Us\\n\\nPlease use the form below to get in touch with our team. We will get back to you as soon as possible.\\n\\n[[form]]\\n[[form.field:full_name:text:required:label=Full Name:placeholder=John Doe]]\\n[[form.field:email:email:required:label=Email Address:placeholder=john@example.com]]\\n[[form.field:message:textarea:required:label=Your Message:placeholder=How can we help you?]]\\n[[form.submit:label=Send Message:webhook=https://hooks.example.com/contact-form-received]]\\n\\n#[[callout:info:Contact Information]]\\n**Email:** support@example.com\\n**Phone:** +1 (555) 000-0000\\n**Hours:** Mon-Fri, 9am - 5pm EST\"}"
+              }
+            }
+          ],
+          "tokenUsage": {
+            "total": 429,
+            "prompt": 224,
+            "completion": 205,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2175,
+          "finishReason": "tool_calls",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1409",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:16 GMT",
+                "modal-function-call-id": "fc-01KWC8K4CNG5TEHZPCQHPBR7CV",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Contact form request calls generate_mdma",
+          "vars": {
+            "request": "Create a contact form with fields for full name, email address, and a message."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 0,
+        "vars": {
+          "request": "Create a contact form with fields for full name, email address, and a message."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1409",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:16 GMT",
+              "modal-function-call-id": "fc-01KWC8K4CNG5TEHZPCQHPBR7CV",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly called generate_mdma tool",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs"
+              }
+            }
+          ]
+        },
+        "id": "73238bd8-0e6e-40c5-bf96-11cd6410765d",
+        "latencyMs": 5270,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"Build an employee onboarding flow with three steps: personal information, IT equipment setup, and policy acknowledgment.\\n\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": [
+            {
+              "id": "chatcmpl-tool-896537d1a41c6994",
+              "type": "function",
+              "function": {
+                "name": "generate_mdma",
+                "arguments": "{\"document\": \"# 🚀 Employee Onboarding Flow\\n\\nWelcome to the team! Please complete the following onboarding steps to get everything set up for your first week.\\n\\n---\\n\\n## 📋 Step 1: Personal Information\\n*Please ensure your contact details and emergency information are up to date.*\\n\\n| Field | Input Type | Placeholder/Help |\\n| :--- | :--- | :--- |\\n| **Full Legal Name** | `text` | As it appears on government ID |\\n| **Date of Birth** | `date` | MM/DD/YYYY |\\n| **Personal Email** | `email` | For non-work communications |\\n| **Phone Number** | `tel` | Primary contact number |\\n| **Emergency Contact Name** | `text` | Full Name |\\n| **Emergency Contact Phone** | `tel` | Phone number |\\n\\n<button label=\\\"Submit Personal Info\\\" action=\\\"submit_personal_info\\\" />\\n\\n---\\n\\n## 💻 Step 2: IT Equipment Setup\\n*Select the hardware you require to perform your role. Your manager will approve these requests.*\\n\\n<callout type=\\\"info\\\" title=\\\"Equipment Policy\\\">\\nAll company-issued hardware must be returned upon termination of employment.\\n</callout>\\n\\n| Equipment Item | Selection | Notes |\\n| :--- | :--- | :--- |\\n| **Laptop Model** | `select(MacBook Pro, Dell XPS, Lenovo ThinkPad)` | Choose based on role |\\n| **Monitor** | `checkbox(Single, Dual, None)` | Standard is Dual |\\n| **Peripherals** | `multiselect(Mouse, Keyboard, Headset, Webcam)` | Check all that apply |\\n| **Special Requirements** | `textarea` | Any accessibility or hardware needs |\\n\\n<approval-gate id=\\\"it_approval\\\" title=\\\"IT Hardware Approval\\\" required=\\\"true\\\">\\n  *Approval is required before equipment is dispatched.*\\n</approval-gate>\\n\\n<button label=\\\"Submit IT Request\\\" action=\\\"submit_it_request\\\" />\\n\\n---\\n\\n## ⚖️ Step 3: Policy Acknowledgment\\n*Please review and confirm your understanding of the following company policies.*\\n\\n<tasklist>\\n- [ ] **Code of Conduct**: Review our standards for professional behavior and ethics.\\n- [ ] **Data Privacy & Security**: Understand how we handle sensitive company and client data.\\n- [ ] **Remote Work Policy**: Review guidelines for working from home and security protocols.\\n- [ ] **Anti-Harassment Policy**: Our commitment to a safe and inclusive workplace.\\n</tasklist>\\n\\n> [!IMPORTANT]\\n> You must complete all acknowledgments before the onboarding process can be finalized.\\n\\n<webhook url=\\\"https://api.company.com/onboarding/complete\\\" method=\\\"POST\\\" />\\n\\n---\\n\\n### 👋 Next Steps\\nOnce all three steps are completed and approved, your onboarding dashboard will be unlocked, giving you access to your team's Slack channel, JIRA boards, and upcoming training sessions.\\n\\n**Questions?** Reach out to #help-hr on Slack.\"}"
+              }
+            }
+          ],
+          "tokenUsage": {
+            "total": 888,
+            "prompt": 227,
+            "completion": 661,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 5270,
+          "finishReason": "tool_calls",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "3557",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:18 GMT",
+                "modal-function-call-id": "fc-01KWC8K68KGFW2X3CZYCHK85EQ",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Onboarding flow request calls generate_mdma",
+          "vars": {
+            "request": "Build an employee onboarding flow with three steps: personal information, IT equipment setup, and policy acknowledgment.\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 1,
+        "vars": {
+          "request": "Build an employee onboarding flow with three steps: personal information, IT equipment setup, and policy acknowledgment.\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "3557",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:18 GMT",
+              "modal-function-call-id": "fc-01KWC8K68KGFW2X3CZYCHK85EQ",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly called generate_mdma tool",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs"
+              }
+            }
+          ]
+        },
+        "id": "731948ef-0150-422a-92e5-adb60bae72d1",
+        "latencyMs": 3141,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"I need a bug report form for my engineering team. It should capture the title, severity, steps to reproduce, and an optional file attachment.\\n\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": [
+            {
+              "id": "chatcmpl-tool-bfa158702757aca6",
+              "type": "function",
+              "function": {
+                "name": "generate_mdma",
+                "arguments": "{\"document\": \"# 🐞 Bug Report Form\\n\\nPlease fill out the details below to report a technical issue. The engineering team will review this report to prioritize and resolve the bug.\\n\\n---\\n\\n## 📋 Bug Details\\n\\n| Field | Input / Selection |\\n| :--- | :--- |\\n| **Bug Title** | [Enter a concise, descriptive title here] |\\n| **Severity** | <select><option>Low (UI/Minor annoyance)</option><option>Medium (Feature broken, workarounds exist)</option><option>High (Major feature broken, no workaround)</option><option>Critical (System down/Data loss)</option></select> |\\n| **Environment** | <select><option>Production</option><option>Staging</option><option>Development/Local</option></select> |\\n\\n## 🛠️ Steps to Reproduce\\n\\nPlease provide a numbered list of actions taken to trigger the bug:\\n\\n1.  \\n2.  \\n3.  \\n\\n> [!TIP]\\n> Be as specific as possible about what you clicked or what data you entered.\\n\\n## 🖼️ Attachments & Evidence\\n\\nIf you have screenshots, logs, or video recordings of the issue, please upload them below.\\n\\n**File Upload:**\\n[Attach File](https://example.com/upload-endpoint)\\n\\n---\\n\\n## 🚀 Submission\\n\\n<button action=\\\"webhook\\\" url=\\\"https://api.yourteam.com/bug-reports\\\" label=\\\"Submit Bug Report\\\" />\\n\\n<button action=\\\"reset\\\" label=\\\"Clear Form\\\" />\\n\\n***\\n\\n*Form generated by MDMA Assistant. Please ensure all \\\"High\\\" and \\\"Critical\\\" bugs are also logged in your primary tracking system (Jira/Linear).*\"}"
+              }
+            }
+          ],
+          "tokenUsage": {
+            "total": 611,
+            "prompt": 236,
+            "completion": 375,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 3141,
+          "finishReason": "tool_calls",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "2296",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:23 GMT",
+                "modal-function-call-id": "fc-01KWC8KBE733NWX4BAN3AEGT0F",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Bug report form calls generate_mdma",
+          "vars": {
+            "request": "I need a bug report form for my engineering team. It should capture the title, severity, steps to reproduce, and an optional file attachment.\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 2,
+        "vars": {
+          "request": "I need a bug report form for my engineering team. It should capture the title, severity, steps to reproduce, and an optional file attachment.\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "2296",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:23 GMT",
+              "modal-function-call-id": "fc-01KWC8KBE733NWX4BAN3AEGT0F",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly called generate_mdma tool",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs"
+              }
+            }
+          ]
+        },
+        "id": "a0708fc2-6469-402a-b292-84308c7f1e5d",
+        "latencyMs": 4279,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"Create a customer satisfaction survey with rating questions for product quality, support experience, and likelihood to recommend.\\n\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": [
+            {
+              "id": "chatcmpl-tool-a5bd796e36ce0d85",
+              "type": "function",
+              "function": {
+                "name": "generate_mdma",
+                "arguments": "{\"document\": \"# Customer Satisfaction Survey\\n\\nThank you for your recent experience with us! We value your feedback and would love to hear your thoughts so we can improve our products and services.\\n\\n---\\n\\n### 🛠 Product Quality\\n*How satisfied are you with the quality and reliability of our products?*\\n\\n| Criteria | 1 (Very Poor) | 2 | 3 | 4 | 5 (Excellent) |\\n| :--- | :---: | :---: | :---: | :---: | :---: |\\n| **Durability & Build** | ◯ | ◯ | ◯ | ◯ | ◯ |\\n| **Ease of Use** | ◯ | ◯ | ◯ | ◯ | ◯ |\\n| **Feature Set** | ◯ | ◯ | ◯ | ◯ | ◯ |\\n\\n---\\n\\n### 🎧 Support Experience\\n*Please rate your recent interactions with our customer support team.*\\n\\n| Criteria | 1 (Poor) | 2 | 3 | 4 | 5 (Great) |\\n| :--- | :---: | :---: | :---: | :---: | :---: |\\n| **Response Time** | ◯ | ◯ | ◯ | ◯ | ◯ |\\n| **Technical Knowledge** | ◯ | ◯ | ◯ | ◯ | ◯ |\\n| **Resolution Speed** | ◯ | ◯ | ◯ | ◯ | ◯ |\\n\\n---\\n\\n### 🚀 Loyalty & Recommendation\\n*Your likelihood to recommend us to a friend or colleague.*\\n\\n**On a scale of 0–10, how likely are you to recommend us?**\\n0 (Not at all likely) —————— 10 (Extremely likely)\\n\\n[ 0 ] [ 1 ] [ 2 ] [ 3 ] [ 4 ] [ 5 ] [ 6 ] [ 7 ] [ 8 ] [ 9 ] [ 10 ]\\n\\n---\\n\\n### ✍️ Additional Comments\\n*Is there anything else you'd like to share with us?*\\n\\n[ Text Area: Please type your feedback here... ]\\n\\n---\\n\\n### 📬 Final Step\\n> [!IMPORTANT]\\n> Please ensure you have filled out all required fields before clicking submit.\\n\\n[ Submit Survey ] {{ webhook: \\\"https://api.example.com/v1/survey-submit\\\" }}\"}"
+              }
+            }
+          ],
+          "tokenUsage": {
+            "total": 756,
+            "prompt": 227,
+            "completion": 529,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 4279,
+          "finishReason": "tool_calls",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "2436",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:26 GMT",
+                "modal-function-call-id": "fc-01KWC8KEH8X9QFK99MCFNT7HDS",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Customer satisfaction survey calls generate_mdma",
+          "vars": {
+            "request": "Create a customer satisfaction survey with rating questions for product quality, support experience, and likelihood to recommend.\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 3,
+        "vars": {
+          "request": "Create a customer satisfaction survey with rating questions for product quality, support experience, and likelihood to recommend.\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "2436",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:26 GMT",
+              "modal-function-call-id": "fc-01KWC8KEH8X9QFK99MCFNT7HDS",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly called generate_mdma tool",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs"
+              }
+            }
+          ]
+        },
+        "id": "9a3a47a4-c8fd-4828-b4b2-db39eb2afc66",
+        "latencyMs": 7093,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"Make an expense reimbursement approval workflow: the employee submits amount and category, then a manager approves or rejects with a comment.\\n\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": [
+            {
+              "id": "chatcmpl-tool-8a3d20eaf8ba813b",
+              "type": "function",
+              "function": {
+                "name": "generate_mdma",
+                "arguments": "{\"document\": \"# Expense Reimbursement Workflow\\n\\nThis workflow manages the end-to-end process of employee expense claims, from initial submission to final manager approval.\\n\\n## Phase 1: Employee Submission\\n*Please fill out the form below to request reimbursement for your business expenses.*\\n\\n---\\n\\n### 📝 Expense Claim Form\\n> [!IMPORTANT]\\n> Ensure all uploaded receipts match the total amount entered to avoid delays in processing.\\n\\n<form \\n  action=\\\"https://api.yourcompany.com/expenses/submit\\\" \\n  method=\\\"POST\\\"\\n>\\n  <fieldset>\\n    <legend>Claim Details</legend>\\n    \\n    <label for=\\\"amount\\\">Total Amount (USD)</label>\\n    <input type=\\\"number\\\" id=\\\"amount\\\" name=\\\"amount\\\" placeholder=\\\"0.00\\\" required step=\\\"0.01\\\" />\\n\\n    <label for=\\\"category\\\">Expense Category</label>\\n    <select id=\\\"category\\\" name=\\\"category\\\" required>\\n      <option value=\\\"\\\" disabled selected>Select a category...</option>\\n      <option value=\\\"travel\\\">Travel (Flights, Hotel, Rail)</option>\\n      <option value=\\\"meals\\\">Meals & Entertainment</option>\\n      <option value=\\\"transport\\\">Ground Transport (Uber, Taxi, Parking)</option>\\n      <option value=\\\"supplies\\\">Office Supplies</option>\\n      <option value=\\\"other\\\">Other</option>\\n    </select>\\n\\n    <label for=\\\"description\\\">Business Purpose / Description</label>\\n    <textarea id=\\\"description\\\" name=\\\"description\\\" placeholder=\\\"Briefly explain the business need for this expense...\\\" required />\\n\\n    <label for=\\\"receipt\\\">Attach Receipt (Image/PDF)</label>\\n    <input type=\\\"file\\\" id=\\\"receipt\\\" name=\\\"receipt\\\" accept=\\\"image/*,.pdf\\\" required />\\n  </fieldset>\\n\\n  <button type=\\\"submit\\\" variant=\\\"primary\\\">Submit for Approval</button>\\n</form>\\n\\n---\\n\\n## Phase 2: Manager Review\\n*This section is visible only to designated approving managers once a claim has been submitted.*\\n\\n### 🔍 Pending Approval\\nThe following expense claim requires your immediate attention:\\n\\n| Item | Detail |\\n| :--- | :--- |\\n| **Employee** | Jane Doe |\\n| **Amount** | $142.50 |\\n| **Category** | Travel |\\n| **Description** | Client lunch meeting - Q3 Strategy |\\n\\n<callout type=\\\"info\\\">\\n**Approval Guidelines:** Claims exceeding $500 require additional VP authorization. Please verify that the expense aligns with the department budget.\\n</callout>\\n\\n### 🗳️ Decision Panel\\n<form action=\\\"https://api.yourcompany.com/expenses/approve\\\" method=\\\"POST\\\">\\n  <label for=\\\"manager-comment\\\">Reviewer Comments</label>\\n  <textarea id=\\\"manager-comment\\\" name=\\\"comment\\\" placeholder=\\\"Provide reasoning for rejection or notes for accounting...\\\" />\\n\\n  <div style=\\\"display: flex; gap: 10px;\\\">\\n    <button type=\\\"submit\\\" name=\\\"action\\\" value=\\\"approve\\\" variant=\\\"success\\\">Approve Claim</button>\\n    <button type=\\\"submit\\\" name=\\\"action\\\" value=\\\"reject\\\" variant=\\\"danger\\\">Reject Claim</button>\\n  </div>\\n</form>\\n\\n---\\n\\n## Workflow Status Summary\\nOnce a decision is made, the status will be updated and the user notified.\\n\\n| Role | Action | Status |\\n| :--- | :--- | :--- |\\n| Employee | Submit Claim | ✅ Completed |\\n| Manager | Review & Decision | ⏳ Pending |\\n| Finance | Process Payment | 🌑 Awaiting Approval |\\n\\n<webhook url=\\\"https://api.yourcompany.com/webhooks/expense-events\\\" />\"}"
+              }
+            }
+          ],
+          "tokenUsage": {
+            "total": 1048,
+            "prompt": 231,
+            "completion": 817,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 7093,
+          "finishReason": "tool_calls",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "4336",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:30 GMT",
+                "modal-function-call-id": "fc-01KWC8KJR600HBJ9HPY492BQYT",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Expense approval workflow calls generate_mdma",
+          "vars": {
+            "request": "Make an expense reimbursement approval workflow: the employee submits amount and category, then a manager approves or rejects with a comment.\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 4,
+        "vars": {
+          "request": "Make an expense reimbursement approval workflow: the employee submits amount and category, then a manager approves or rejects with a comment.\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "4336",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:30 GMT",
+              "modal-function-call-id": "fc-01KWC8KJR600HBJ9HPY492BQYT",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly called generate_mdma tool",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs"
+              }
+            }
+          ]
+        },
+        "id": "218fd67c-ff05-4742-8171-b1d9bb7d37d6",
+        "latencyMs": 6754,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"Build a sales pipeline dashboard showing deals grouped by stage with a bar chart of revenue by month.\\n\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": [
+            {
+              "id": "chatcmpl-tool-a44637a72e677ae9",
+              "type": "function",
+              "function": {
+                "name": "generate_mdma",
+                "arguments": "{\"document\": \"# 🚀 Sales Pipeline Dashboard\\n\\n> **Overview**: This dashboard provides a real-time view of the current sales pipeline, stage distribution, and monthly revenue trends.\\n\\n---\\n\\n## 📊 Revenue by Month\\n{\\n  \\\"type\\\": \\\"chart\\\",\\n  \\\"chartType\\\": \\\"bar\\\",\\n  \\\"title\\\": \\\"Monthly Projected Revenue\\\",\\n  \\\"data\\\": [\\n    {\\\"month\\\": \\\"Jan\\\", \\\"revenue\\\": 45000},\\n    {\\\"month\\\": \\\"Feb\\\", \\\"revenue\\\": 52000},\\n    {\\\"month\\\": \\\"Mar\\\", \\\"revenue\\\": 38000},\\n    {\\\"month\\\": \\\"Apr\\\", \\\"revenue\\\": 61000},\\n    {\\\"month\\\": \\\"May\\\", \\\"revenue\\\": 55000},\\n    {\\\"month\\\": \\\"Jun\\\", \\\"revenue\\\": 67000}\\n  ],\\n  \\\"xAxis\\\": \\\"month\\\",\\n  \\\"yAxis\\\": \\\"revenue\\\",\\n  \\\"yAxisLabel\\\": \\\"Revenue ($)\\\"\\n}\\n\\n---\\n\\n## 📑 Deal Pipeline by Stage\\n\\n| Stage | Deal Name | Account | Deal Value | Owner | Last Activity |\\n| :--- | :--- | :--- | :--- | :--- | :--- |\\n| 🟢 **Discovery** | Cloud Migration | Acme Corp | $12,000 | Sarah K. | 2h ago |\\n| 🟢 **Discovery** | API Integration | Globex Inc | $8,500 | Mike R. | Yesterday |\\n| 🟡 **Proposal** | Security Audit | Stark Ind | $45,000 | Sarah K. | 1d ago |\\n| 🟡 **Proposal** | Data Warehouse | Wayne Ent | $22,000 | David L. | 3h ago |\\n| 🟠 **Negotiation** | SaaS Renewal | Cyberdyne | $15,000 | Mike R. | 10m ago |\\n| 🟠 **Negotiation** | Mobile App Dev | Umbrella Co | $35,000 | Sarah K. | 2d ago |\\n| 🔵 **Closed Won** | Network Upgrade | Initech | $28,000 | David L. | Completed |\\n\\n---\\n\\n## 🛠 Quick Actions\\n\\n{\\n  \\\"type\\\": \\\"button_group\\\",\\n  \\\"buttons\\\": [\\n    {\\n      \\\"label\\\": \\\"➕ Create New Deal\\\",\\n      \\\"action\\\": \\\"open_form\\\",\\n      \\\"formId\\\": \\\"new-deal-form\\\"\\n    },\\n    {\\n      \\\"label\\\": \\\"📥 Export Pipeline (CSV)\\\",\\n      \\\"action\\\": \\\"webhook\\\",\\n      \\\"url\\\": \\\"https://api.yoursalescrm.com/export/deals\\\"\\n    },\\n    {\\n      \\\"label\\\": \\\"🔄 Refresh Dashboard\\\",\\n      \\\"action\\\": \\\"refresh\\\"\\n    }\\n  ]\\n}\\n\\n---\\n\\n## 🚨 High Priority Alerts\\n\\n:::warning[Immediate Attention Required]\\nThe following deals have been stuck in the **Proposal** stage for more than 10 days:\\n- **Security Audit** ($45,000) - No contact in 12 days.\\n- **Data Warehouse** ($22,000) - Awaiting legal review.\\n:::\\n\\n:::info[Pipeline Health]\\nYour average deal cycle is currently **24 days**. This is **4 days faster** than last month. 📈\\n:::\"}"
+              }
+            }
+          ],
+          "tokenUsage": {
+            "total": 982,
+            "prompt": 226,
+            "completion": 756,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 6754,
+          "finishReason": "tool_calls",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "3391",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:38 GMT",
+                "modal-function-call-id": "fc-01KWC8KSQ5AME9YR648XKSYXYK",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Sales dashboard request calls generate_mdma",
+          "vars": {
+            "request": "Build a sales pipeline dashboard showing deals grouped by stage with a bar chart of revenue by month.\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 5,
+        "vars": {
+          "request": "Build a sales pipeline dashboard showing deals grouped by stage with a bar chart of revenue by month.\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "3391",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:38 GMT",
+              "modal-function-call-id": "fc-01KWC8KSQ5AME9YR648XKSYXYK",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly called generate_mdma tool",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs"
+              }
+            }
+          ]
+        },
+        "id": "09af351d-6a80-4a55-892d-eaccbd4b2cf6",
+        "latencyMs": 4519,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"Create a document upload form for medical records with patient name, date of birth, and a file upload field.\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": [
+            {
+              "id": "chatcmpl-tool-9b487c7c0f4dc036",
+              "type": "function",
+              "function": {
+                "name": "generate_mdma",
+                "arguments": "{\"document\": \"# 📁 Medical Records Upload Portal\\n\\nPlease use this form to securely upload your medical documents for review. Ensure all files are clear and legible.\\n\\n:::callout(type=\\\"info\\\", title=\\\"Submission Privacy Notice\\\")\\nAll uploaded files are encrypted and handled in compliance with HIPAA regulations. Only authorized medical staff will have access to these documents.\\n:::\\n\\n<form>\\n  <fieldset>\\n    <legend>Patient Identification</legend>\\n    \\n    <group>\\n      <label for=\\\"patient-name\\\">Full Legal Name</label>\\n      <input type=\\\"text\\\" id=\\\"patient-name\\\" name=\\\"patient-name\\\" placeholder=\\\"John Doe\\\" required />\\n    </group>\\n\\n    <group>\\n      <label for=\\\"dob\\\">Date of Birth</label>\\n      <input type=\\\"date\\\" id=\\\"dob\\\" name=\\\"dob\\\" required />\\n    </group>\\n  </fieldset>\\n\\n  <fieldset>\\n    <legend>Document Upload</legend>\\n    \\n    <group>\\n      <label for=\\\"medical-files\\\">Select Files</label>\\n      <input type=\\\"file\\\" id=\\\"medical-files\\\" name=\\\"medical-files\\\" multiple required />\\n      <p class=\\\"help-text\\\">Accepted formats: PDF, JPG, PNG. Maximum file size: 10MB per file.</p>\\n    </group>\\n\\n    <group>\\n      <label for=\\\"reason\\\">Reason for Upload</label>\\n      <select id=\\\"reason\\\" name=\\\"reason\\\">\\n        <option value=\\\"routine\\\">Routine Check-up</option>\\n        <option value=\\\"new-patient\\\">New Patient Intake</option>\\n        <option value=\\\"specialist\\\">Specialist Referral</option>\\n        <option value=\\\"follow-up\\\">Follow-up Appointment</option>\\n        <option value=\\\"other\\\">Other</option>\\n      </select>\\n    </group>\\n  </fieldset>\\n\\n  <div class=\\\"form-actions\\\">\\n    <button type=\\\"submit\\\" variant=\\\"primary\\\">📤 Submit Medical Records</button>\\n    <button type=\\\"button\\\" variant=\\\"ghost\\\" onclick=\\\"window.history.back()\\\">Cancel</button>\\n  </div>\\n</form>\\n\\n<webhook url=\\\"/api/v1/medical/upload\\\" method=\\\"POST\\\" />\"}"
+              }
+            }
+          ],
+          "tokenUsage": {
+            "total": 733,
+            "prompt": 229,
+            "completion": 504,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 4519,
+          "finishReason": "tool_calls",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "2867",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:44 GMT",
+                "modal-function-call-id": "fc-01KWC8M0ASAYX238Q94ZX1AMEY",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "File upload form calls generate_mdma",
+          "vars": {
+            "request": "Create a document upload form for medical records with patient name, date of birth, and a file upload field."
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 6,
+        "vars": {
+          "request": "Create a document upload form for medical records with patient name, date of birth, and a file upload field."
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "2867",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:44 GMT",
+              "modal-function-call-id": "fc-01KWC8M0ASAYX238Q94ZX1AMEY",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly called generate_mdma tool",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs"
+              }
+            }
+          ]
+        },
+        "id": "12add697-491c-45e3-9439-0a3b5d1e9f69",
+        "latencyMs": 5712,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"I need a KYC form that collects full name, date of birth, national ID number, residential address, and a document upload for ID verification.\\n\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": [
+            {
+              "id": "chatcmpl-tool-b0677ddd693ca26d",
+              "type": "function",
+              "function": {
+                "name": "generate_mdma",
+                "arguments": "{\"document\": \"# Know Your Customer (KYC) Verification Form\\n\\nPlease complete the form below to verify your identity. Ensure all information provided is accurate and matches your official documents.\\n\\n---\\n\\n## 👤 Personal Information\\n\\n:::callout info: Identity Requirements\\nVerification is required by law to prevent identity theft and fraud. Please ensure the document you upload is valid and not expired.\\n:::\\n\\n<form>\\n  <section>\\n    ### Basic Details\\n    \\n    **Full Name**\\n    Please provide your full name as it appears on your government-issued ID.\\n    <input type=\\\"text\\\" name=\\\"full_name\\\" label=\\\"Full Legal Name\\\" placeholder=\\\"e.g. Johnathan Doe\\\" required />\\n\\n    **Date of Birth**\\n    <input type=\\\"date\\\" name=\\\"dob\\\" label=\\\"Date of Birth\\\" required />\\n\\n    **National ID Number**\\n    Enter your government-issued identification number (SSN, Passport Number, etc.).\\n    <input type=\\\"text\\\" name=\\\"national_id\\\" label=\\\"National ID / Passport Number\\\" placeholder=\\\"e.g. 123-456-789\\\" required />\\n  </section>\\n\\n  <section>\\n    ### Residential Address\\n\\n    **Street Address**\\n    <input type=\\\"text\\\" name=\\\"address_street\\\" label=\\\"Street Address\\\" placeholder=\\\"123 Main St, Apt 4B\\\" required />\\n\\n    **City**\\n    <input type=\\\"text\\\" name=\\\"address_city\\\" label=\\\"City\\\" required />\\n\\n    **State / Province**\\n    <input type=\\\"text\\\" name=\\\"address_state\\\" label=\\\"State / Province\\\" required />\\n\\n    **Postal Code**\\n    <input type=\\\"text\\\" name=\\\"address_zip\\\" label=\\\"Postal / Zip Code\\\" required />\\n\\n    **Country**\\n    <select name=\\\"address_country\\\" label=\\\"Country\\\" required>\\n      <option value=\\\"\\\">Select a country</option>\\n      <option value=\\\"us\\\">United States</option>\\n      <option value=\\\"uk\\\">United Kingdom</option>\\n      <option value=\\\"ca\\\">Canada</option>\\n      <option value=\\\"au\\\">Australia</option>\\n      <option value=\\\"other\\\">Other</option>\\n    </select>\\n  </section>\\n\\n  <section>\\n    ### Document Upload\\n\\n    **ID Document Verification**\\n    Please upload a high-quality photo or scan of your government-issued ID (Passport, Driver's License, or National ID card).\\n    \\n    <file_upload name=\\\"id_document\\\" label=\\\"Upload ID Document\\\" accept=\\\"image/*,.pdf\\\" required />\\n    \\n    :::callout warning: Upload Tips\\n    * Ensure the entire document is visible within the frame.\\n    * Make sure all text is legible and not obscured by glare.\\n    * Files must be in JPEG, PNG, or PDF format.\\n    :::\\n  </section>\\n\\n  <button type=\\\"submit\\\" variant=\\\"primary\\\">Submit Verification</button>\\n</form>\\n\\n---\\n\\n### 🛡️ Data Privacy & Consent\\nBy submitting this form, you acknowledge that you have read our [Privacy Policy] and consent to the processing of your personal data for identity verification purposes.\\n\\n<webhook url=\\\"https://api.yourdomain.com/v1/kyc/verify\\\" method=\\\"POST\\\" />\"}"
+              }
+            }
+          ],
+          "tokenUsage": {
+            "total": 964,
+            "prompt": 236,
+            "completion": 728,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 5712,
+          "finishReason": "tool_calls",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "3902",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:49 GMT",
+                "modal-function-call-id": "fc-01KWC8M4RZ1C4KCP70QMRZYFPZ",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "KYC verification form calls generate_mdma",
+          "vars": {
+            "request": "I need a KYC form that collects full name, date of birth, national ID number, residential address, and a document upload for ID verification.\n"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs"
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 7,
+        "vars": {
+          "request": "I need a KYC form that collects full name, date of birth, national ID number, residential address, and a document upload for ID verification.\n"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "3902",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:49 GMT",
+              "modal-function-call-id": "fc-01KWC8M4RZ1C4KCP70QMRZYFPZ",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly did not call generate_mdma for a non-document request",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs",
+                "config": {
+                  "shouldCall": false
+                }
+              }
+            }
+          ]
+        },
+        "id": "868a9bb7-3604-4641-ac81-a968c3a3b7c4",
+        "latencyMs": 2698,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"What kind of interactive documents can MDMA format represent?\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "The MDMA format is designed to go beyond static text by embedding interactive components that allow users to input data, trigger workflows, and visualize information directly within the document.\n\nHere are the primary types of interactive elements MDMA can represent:\n\n### 1. Data Entry & Collection\n*   **Forms:** Structured collections of input fields (text, selects, checkboxes, dates, etc.) used to capture user data.\n*   **Webhooks:** Buttons or form submissions that can send data to external APIs, enabling real-time integration with other software systems.\n\n### 2. Structured Data & Planning\n*   **Interactive Tables:** Sophisticated tables for data display that can include sorting, filtering, and specific cell actions.\n*   **Tasklists:** Checklists used for project management, tracking progress, and ensuring step-by-step completion of processes.\n\n### 3. Decision Making & Governance\n*   **Approval Gates:** Specialized UI components that require a formal \"Approve\" or \"Reject\" action, often used to lock in decisions or initiate official workflows.\n*   **Buttons:** Actionable elements used to trigger immediate commands or navigation.\n\n### 4. Analysis & Communication\n*   **Charts:** Dynamic visual representations of data (line, bar, pie, etc.) that make complex information easier to digest.\n*   **Callouts:** Specialized alert or info boxes used to highlight important caveats, warnings, or contextually relevant information.\n\nIf you would like to see an example of any of these in action, just let me know what you're working on!",
+          "tokenUsage": {
+            "total": 544,
+            "prompt": 218,
+            "completion": 326,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2698,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "2201",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:55 GMT",
+                "modal-function-call-id": "fc-01KWC8MAD984DWYTZF35Z5JREH",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Informational question does not call generate_mdma",
+          "vars": {
+            "request": "What kind of interactive documents can MDMA format represent?"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs",
+              "config": {
+                "shouldCall": false
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 8,
+        "vars": {
+          "request": "What kind of interactive documents can MDMA format represent?"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "2201",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:55 GMT",
+              "modal-function-call-id": "fc-01KWC8MAD984DWYTZF35Z5JREH",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly did not call generate_mdma for a non-document request",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs",
+                "config": {
+                  "shouldCall": false
+                }
+              }
+            }
+          ]
+        },
+        "id": "823bd9a9-3e50-4d38-bbb5-3b1e3d78e6ca",
+        "latencyMs": 5351,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"Can you explain the difference between a form and a steps component in MDMA?\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "In MDMA, both components are used to guide users through processes, but they serve different structural and logical purposes. Here is the breakdown:\n\n### 1. The Form Component\nThe **Form** is a collection of input fields designed for **data entry**. Its primary goal is to capture information from the user and send it to a destination (via a webhook or a state update).\n\n*   **Purpose:** To gather specific, structured data (text, numbers, dates, selections, files).\n*   **Interaction Model:** The user sees all required fields at once (or in a single logical group) and submits the entire batch of data to complete the action.\n*   **Best Use Case:** Creating a new ticket, submitting an expense report, or updating a profile.\n*   **Logic:** Forms are usually \"single-shot.\" Once you click submit, the task for that specific input session is done.\n\n### 2. The Steps Component\nThe **Steps** component is a **navigation and progress indicator**. It is a structural element used to break a complex, long-running process into manageable, digestible chunks.\n\n*   **Purpose:** To provide visual feedback on \"where I am\" and \"how much is left\" in a complex workflow.\n*   **Interaction Model:** It tells the user they are in a multi-stage journey. As they complete one part of a process, the steps component updates to show progress (e.g., moving from Step 1 to Step 2).\n*   **Best Use Case:** An onboarding wizard, a complex multi-stage approval workflow, or a multi-step procurement process.\n*   **Logic:** Steps are \"continuous.\" They represent the state of a lifecycle rather than a single data capture event.\n\n---\n\n### Key Comparison Table\n\n| Feature | Form | Steps |\n| :--- | :--- | :--- |\n| **Primary Function** | Data Capture (Input) | Progress Tracking (Navigation) |\n| **User Focus** | \"What information do I need to provide?\" | \"How far along am I in this process?\" |\n| **Visual Output** | Input fields, checkboxes, dropdowns | A progress bar or breadcrumb-style stepper |\n| **Lifecycle** | Completed upon submission | Persists throughout the entire duration of a multi-stage task |\n| **Implementation** | Contains interactive inputs | Often *contains* or *wraps* several forms (one for each step) |\n\n### How they work together\nIn a professional MDMA document, you rarely choose one *over* the other; instead, you **nest forms inside steps**.\n\n**Example:** You are building a \"New Project Setup\" wizard:\n1.  You use a **Steps component** at the top to show: *Setup $\\rightarrow$ Budget $\\rightarrow$ Team $\\rightarrow$ Launch*.\n2.  On the **Setup** step, you render a **Form** to capture the project name and description.\n3.  Once the user submits the first form, the **Steps component** updates to show Step 1 is complete and Step 2 is active.\n4.  On the **Budget** step, you render a *new* **Form** to capture financial data.",
+          "tokenUsage": {
+            "total": 899,
+            "prompt": 223,
+            "completion": 676,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 5351,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "3515",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:38:57 GMT",
+                "modal-function-call-id": "fc-01KWC8MD2HK5PKHYDMDXZJYSPZ",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Follow-up clarification does not call generate_mdma",
+          "vars": {
+            "request": "Can you explain the difference between a form and a steps component in MDMA?"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs",
+              "config": {
+                "shouldCall": false
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 9,
+        "vars": {
+          "request": "Can you explain the difference between a form and a steps component in MDMA?"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "3515",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:38:57 GMT",
+              "modal-function-call-id": "fc-01KWC8MD2HK5PKHYDMDXZJYSPZ",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly did not call generate_mdma for a non-document request",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs",
+                "config": {
+                  "shouldCall": false
+                }
+              }
+            }
+          ]
+        },
+        "id": "6eb79019-43fc-4d8a-b9a2-9eefd87ca480",
+        "latencyMs": 879,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"Thank you, the form looks great!\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "You're very welcome! I'm glad to hear the form meets your needs. \n\nIf you need any modifications—like adding more fields, creating a different type of document (like a dashboard or a tasklist), or setting up any new automation—just let me know. I'm here to help!",
+          "tokenUsage": {
+            "total": 281,
+            "prompt": 215,
+            "completion": 66,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 879,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "897",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:39:03 GMT",
+                "modal-function-call-id": "fc-01KWC8MJBS71ER6YSX7Q82AQEH",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "User acknowledgement does not call generate_mdma",
+          "vars": {
+            "request": "Thank you, the form looks great!"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs",
+              "config": {
+                "shouldCall": false
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 10,
+        "vars": {
+          "request": "Thank you, the form looks great!"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "897",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:39:03 GMT",
+              "modal-function-call-id": "fc-01KWC8MJBS71ER6YSX7Q82AQEH",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly did not call generate_mdma for a non-document request",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs",
+                "config": {
+                  "shouldCall": false
+                }
+              }
+            }
+          ]
+        },
+        "id": "2d0219b4-69ba-42e9-ae76-d62e48a4e679",
+        "latencyMs": 2300,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"How many components can I put in a single MDMA document?\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "There is no hard-coded limit on the number of components you can include in a single MDMA document. You can combine multiple forms, tables, tasklists, callouts, and buttons within one document to create complex, multi-step workflows or comprehensive dashboards.\n\nHowever, there are a few practical considerations to keep in mind for the best user experience:\n\n1.  **Complexity & Load Times:** While the system can handle many components, an extremely large document (e.g., dozens of complex forms and massive data tables) may impact the rendering speed or the responsiveness of the interface.\n2.  **Cognitive Load:** From a UX perspective, it's often better to break a very large process into smaller, modular MDMA documents rather than overwhelming the user with a single \"everything-at-once\" page. \n3.  **Token Limits:** Since MDMA documents are generated via text, extremely massive documents might hit the output token limit of the LLM during generation.\n\n**Best Practice:**\nIf you are building a massive enterprise workflow, I recommend a **modular approach**: use one MDMA document for the initial data entry, then use a `webhook` or `button` to trigger a second MDMA document for a different stage of the process (e.g., an Approval Gate).",
+          "tokenUsage": {
+            "total": 490,
+            "prompt": 220,
+            "completion": 270,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2300,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1886",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:39:03 GMT",
+                "modal-function-call-id": "fc-01KWC8MK73XR035178NASFQ7KM",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "MDMA capability question does not call generate_mdma",
+          "vars": {
+            "request": "How many components can I put in a single MDMA document?"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs",
+              "config": {
+                "shouldCall": false
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 11,
+        "vars": {
+          "request": "How many components can I put in a single MDMA document?"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1886",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:39:03 GMT",
+              "modal-function-call-id": "fc-01KWC8MK73XR035178NASFQ7KM",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly did not call generate_mdma for a non-document request",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs",
+                "config": {
+                  "shouldCall": false
+                }
+              }
+            }
+          ]
+        },
+        "id": "6c4e3a62-ae1b-4845-ac83-7d73eec9b47b",
+        "latencyMs": 837,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"Can you remind me what fields were in the last form you created?\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "Since this is the start of our current conversation, I haven't created any forms for you yet! \n\nHowever, I'm ready whenever you are. If you tell me what you need to capture (e.g., an onboarding process, a bug report, a project intake, etc.), I can build a custom interactive MDMA document for you right now.",
+          "tokenUsage": {
+            "total": 297,
+            "prompt": 221,
+            "completion": 76,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 837,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "938",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:39:06 GMT",
+                "modal-function-call-id": "fc-01KWC8MNFMMEJYSTYVMKK6XZ9Q",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Follow-up about previous document does not call generate_mdma",
+          "vars": {
+            "request": "Can you remind me what fields were in the last form you created?"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs",
+              "config": {
+                "shouldCall": false
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 12,
+        "vars": {
+          "request": "Can you remind me what fields were in the last form you created?"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "938",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:39:06 GMT",
+              "modal-function-call-id": "fc-01KWC8MNFMMEJYSTYVMKK6XZ9Q",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly did not call generate_mdma for a non-document request",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs",
+                "config": {
+                  "shouldCall": false
+                }
+              }
+            }
+          ]
+        },
+        "id": "7502e0c5-082c-4b40-b9ba-3fa1eb887226",
+        "latencyMs": 4304,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"Does MDMA support conditional field visibility based on other field values?\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "Yes, MDMA supports conditional logic for form fields. You can control the visibility of specific fields based on the value or state of other fields using a `trigger` and `action` pattern within the field's JSON structure.\n\n### How it works\n\nThe logic follows a simple **\"If [Field A] [Operator] [Value], then [Action] [Field B]\"** pattern.\n\n1.  **The Trigger:** You specify a field that the user interacts with.\n2.  **The Condition:** You define the operator (e.g., `equals`, `not_equals`, `contains`, `greater_than`) and the expected value.\n3.  **The Action:** You define what happens to the target field (usually `show` or `hide`).\n\n### Implementation Example\n\nHere is how you would define a field that only appears when a \"Type\" dropdown is set to \"Physical Goods\":\n\n```json\n{\n  \"type\": \"select\",\n  \"id\": \"shipping_address\",\n  \"label\": \"Shipping Address\",\n  \"required\": true,\n  \"visibility\": {\n    \"trigger_field\": \"order_type\",\n    \"condition\": {\n      \"operator\": \"equals\",\n      \"value\": \"physical_goods\"\n    },\n    \"action\": \"show\"\n  }\n}\n```\n\n### Key Concepts\n\n*   **`visibility` key:** This is added to any field you want to control.\n*   **Multi-field Logic:** If you need a field to show only when *both* Condition A and Condition B are met, you can provide an array of conditions. MDMA treats these as an `AND` operation by default.\n*   **Stateful UI:** When a condition is no longer met (e.g., a user changes a value from \"Physical\" to \"Digital\"), the hidden fields are automatically hidden and their validation state is cleared to prevent dirty data from being submitted.\n\n### Supported Operators\n\n| Operator | Description |\n| :--- | :--- |\n| `equals` | Field value matches exactly. |\n| `not_equals` | Field value does not match. |\n| `contains` | (For strings/arrays) Value is part of the selection. |\n| `greater_than` / `less_than` | (For numbers/dates) Comparison logic. |\n| `is_empty` / `is_not_empty` | Checks for the presence of a value. |",
+          "tokenUsage": {
+            "total": 752,
+            "prompt": 220,
+            "completion": 532,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 4304,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "2675",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:39:07 GMT",
+                "modal-function-call-id": "fc-01KWC8MPC7NNCM86J9W735BAQX",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "General question about MDMA spec does not call generate_mdma",
+          "vars": {
+            "request": "Does MDMA support conditional field visibility based on other field values?"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs",
+              "config": {
+                "shouldCall": false
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 13,
+        "vars": {
+          "request": "Does MDMA support conditional field visibility based on other field values?"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "2675",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:39:07 GMT",
+              "modal-function-call-id": "fc-01KWC8MPC7NNCM86J9W735BAQX",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Model correctly did not call generate_mdma for a non-document request",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/calls-generate-mdma.mjs",
+                "config": {
+                  "shouldCall": false
+                }
+              }
+            }
+          ]
+        },
+        "id": "86de816c-15c1-4695-aafb-4acc793150fd",
+        "latencyMs": 2881,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You are an assistant with a `generate_mdma` tool that produces interactive MDMA documents (forms, tables, charts, tasklists, callouts, approval-gates, buttons, webhooks). Call `generate_mdma` whenever the user asks you to create, build, design, or update an interactive document or UI. For greetings, questions about capabilities, explanations, or other conversational replies, respond normally and do NOT call the tool.\"},{\"role\":\"user\",\"content\":\"Hi, what can you help me with?\"}]",
+          "label": "own-model/prompt-guidance.mjs",
+          "config": {}
+        },
+        "promptId": "dad324a502f63223c6d6d3b28645b0d430643b6142b146bddcff50439ede05ab",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "Hello! I can help you with a wide range of tasks, from casual conversation and answering general questions to complex workflows and data management.\n\nHere is a breakdown of my core capabilities:\n\n### 🧠 Information & Content\n*   **Answering Questions:** I can explain complex topics, provide historical facts, or help with trivia.\n*   **Writing & Editing:** I can draft emails, essays, stories, code, or professional documents, and refine your existing text.\n*   **Summarization:** Paste a long article or document, and I can give you a concise summary or key bullet points.\n*   **Brainstorming:** I can generate ideas for marketing campaigns, gift ideas, project names, or creative writing.\n\n### 📊 Structured Documents & Tools (Interactive)\nOne of my specialized skills is creating **interactive MDMA documents**. If you need a structured way to track something or interact with data, I can generate:\n*   **Forms:** To collect specific information or data entry.\n*   **Tables & Charts:** To organize complex datasets or visualize trends.\n*   **Tasklists & Approval Gates:** To manage project workflows or decision-making processes.\n*   **Callouts & Webhooks:** For highlighting critical information or connecting to other services.\n\n### 💻 Technical & Analytical\n*   **Coding:** I can write, debug, and explain code in many different programming languages.\n*   **Mathematics & Logic:** I can help solve mathematical problems and logical puzzles.\n*   **Data Transformation:** I can convert text into formats like JSON, CSV, or Markdown tables.\n\n**Is there something specific you're working on right now that I can assist with?**",
+          "tokenUsage": {
+            "total": 567,
+            "prompt": 216,
+            "completion": 351,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2881,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "2287",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:39:11 GMT",
+                "modal-function-call-id": "fc-01KWC8MTJ1VGFTAK7NFJKDPPJ1",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "Greeting does not call generate_mdma",
+          "vars": {
+            "request": "Hi, what can you help me with?"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/calls-generate-mdma.mjs",
+              "config": {
+                "shouldCall": false
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 14,
+        "vars": {
+          "request": "Hi, what can you help me with?"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "2287",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:39:11 GMT",
+              "modal-function-call-id": "fc-01KWC8MTJ1VGFTAK7NFJKDPPJ1",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      }
+    ],
+    "stats": {
+      "successes": 15,
+      "failures": 0,
+      "errors": 0,
+      "tokenUsage": {
+        "prompt": 3369,
+        "completion": 6872,
+        "cached": 0,
+        "total": 10241,
+        "numRequests": 15,
+        "completionDetails": {
+          "reasoning": 0,
+          "acceptedPrediction": 0,
+          "rejectedPrediction": 0,
+          "cacheReadInputTokens": 0,
+          "cacheCreationInputTokens": 0
+        },
+        "assertions": {
+          "total": 0,
+          "prompt": 0,
+          "completion": 0,
+          "cached": 0,
+          "numRequests": 0,
+          "completionDetails": {
+            "reasoning": 0,
+            "acceptedPrediction": 0,
+            "rejectedPrediction": 0,
+            "cacheReadInputTokens": 0,
+            "cacheCreationInputTokens": 0
+          }
+        }
+      },
+      "durationMs": 58789,
+      "evaluationDurationMs": 58789
+    }
+  },
+  "config": {
+    "tags": {},
+    "description": "MDMA Agent Guidance Eval (tool-calling) — own model",
+    "prompts": [
+      "file:///Users/marcinsadowski/GIT/mr-mdma/evals/own-model/prompt-guidance.mjs"
+    ],
+    "providers": [
+      {
+        "id": "openai:chat:mdma-26b",
+        "config": {
+          "apiBaseUrl": "https://REDACTED.modal.run/v1",
+          "apiKey": "[REDACTED]",
+          "temperature": 1,
+          "max_tokens": 1024,
+          "min_p": 0.02,
+          "repetition_penalty": 1.1,
+          "chat_template_kwargs": {
+            "enable_thinking": false
+          },
+          "tool_choice": "auto",
+          "tools": [
+            {
+              "type": "function",
+              "function": {
+                "name": "generate_mdma",
+                "description": "Generate an MDMA Markdown document to present structured interactive content to the user (forms, tables, tasklists, approval gates, charts, callouts, buttons, webhooks).\n",
+                "parameters": {
+                  "type": "object",
+                  "properties": {
+                    "document": {
+                      "type": "string",
+                      "description": "The complete MDMA Markdown document."
+                    }
+                  },
+                  "required": [
+                    "document"
+                  ]
+                }
+              }
+            }
+          ]
+        }
+      }
+    ],
+    "tests": [
+      {
+        "description": "Contact form request calls generate_mdma",
+        "vars": {
+          "request": "Create a contact form with fields for full name, email address, and a message."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Onboarding flow request calls generate_mdma",
+        "vars": {
+          "request": "Build an employee onboarding flow with three steps: personal information, IT equipment setup, and policy acknowledgment.\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Bug report form calls generate_mdma",
+        "vars": {
+          "request": "I need a bug report form for my engineering team. It should capture the title, severity, steps to reproduce, and an optional file attachment.\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Customer satisfaction survey calls generate_mdma",
+        "vars": {
+          "request": "Create a customer satisfaction survey with rating questions for product quality, support experience, and likelihood to recommend.\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Expense approval workflow calls generate_mdma",
+        "vars": {
+          "request": "Make an expense reimbursement approval workflow: the employee submits amount and category, then a manager approves or rejects with a comment.\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Sales dashboard request calls generate_mdma",
+        "vars": {
+          "request": "Build a sales pipeline dashboard showing deals grouped by stage with a bar chart of revenue by month.\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs"
+          }
+        ]
+      },
+      {
+        "description": "File upload form calls generate_mdma",
+        "vars": {
+          "request": "Create a document upload form for medical records with patient name, date of birth, and a file upload field."
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs"
+          }
+        ]
+      },
+      {
+        "description": "KYC verification form calls generate_mdma",
+        "vars": {
+          "request": "I need a KYC form that collects full name, date of birth, national ID number, residential address, and a document upload for ID verification.\n"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs"
+          }
+        ]
+      },
+      {
+        "description": "Informational question does not call generate_mdma",
+        "vars": {
+          "request": "What kind of interactive documents can MDMA format represent?"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs",
+            "config": {
+              "shouldCall": false
+            }
+          }
+        ]
+      },
+      {
+        "description": "Follow-up clarification does not call generate_mdma",
+        "vars": {
+          "request": "Can you explain the difference between a form and a steps component in MDMA?"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs",
+            "config": {
+              "shouldCall": false
+            }
+          }
+        ]
+      },
+      {
+        "description": "User acknowledgement does not call generate_mdma",
+        "vars": {
+          "request": "Thank you, the form looks great!"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs",
+            "config": {
+              "shouldCall": false
+            }
+          }
+        ]
+      },
+      {
+        "description": "MDMA capability question does not call generate_mdma",
+        "vars": {
+          "request": "How many components can I put in a single MDMA document?"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs",
+            "config": {
+              "shouldCall": false
+            }
+          }
+        ]
+      },
+      {
+        "description": "Follow-up about previous document does not call generate_mdma",
+        "vars": {
+          "request": "Can you remind me what fields were in the last form you created?"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs",
+            "config": {
+              "shouldCall": false
+            }
+          }
+        ]
+      },
+      {
+        "description": "General question about MDMA spec does not call generate_mdma",
+        "vars": {
+          "request": "Does MDMA support conditional field visibility based on other field values?"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs",
+            "config": {
+              "shouldCall": false
+            }
+          }
+        ]
+      },
+      {
+        "description": "Greeting does not call generate_mdma",
+        "vars": {
+          "request": "Hi, what can you help me with?"
+        },
+        "assert": [
+          {
+            "type": "javascript",
+            "value": "file://assertions/calls-generate-mdma.mjs",
+            "config": {
+              "shouldCall": false
+            }
+          }
+        ]
+      }
+    ],
+    "scenarios": [],
+    "env": {},
+    "outputPath": [
+      "own-model/results-guidance.json"
+    ],
+    "extensions": [],
+    "metadata": {},
+    "evaluateOptions": {}
+  },
+  "shareableUrl": null,
+  "metadata": {
+    "promptfooVersion": "0.121.9",
+    "nodeVersion": "v22.22.0",
+    "platform": "darwin",
+    "arch": "arm64",
+    "exportedAt": "2026-06-30T12:39:15.164Z",
+    "evaluationCreatedAt": "2026-06-30T12:38:16.261Z"
+  }
+}
\ No newline at end of file
diff --git a/evals/own-model/results.json b/evals/own-model/results.json
new file mode 100644
index 0000000..cc0b888
--- /dev/null
+++ b/evals/own-model/results.json
@@ -0,0 +1,12597 @@
+{
+  "evalId": "eval-IZE-2026-06-30T12:32:59",
+  "results": {
+    "version": 3,
+    "timestamp": "2026-06-30T12:32:59.728Z",
+    "prompts": [
+      {
+        "raw": "function ({ vars }) {\n  return [\n    { role: 'system', content: `{% raw %}${SYSTEM_PROMPT}{% endraw %}` },\n    { role: 'user', content: `{% raw %}${vars.request}{% endraw %}` },\n  ];\n}",
+        "label": "own-model/prompt.mjs",
+        "config": {},
+        "id": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "provider": "openai:chat:mdma-26b",
+        "metrics": {
+          "score": 95,
+          "testPassCount": 95,
+          "testFailCount": 0,
+          "testErrorCount": 0,
+          "assertPassCount": 95,
+          "assertFailCount": 0,
+          "totalLatencyMs": 129905,
+          "tokenUsage": {
+            "prompt": 64036,
+            "completion": 12789,
+            "cached": 0,
+            "total": 76825,
+            "numRequests": 95,
+            "completionDetails": {
+              "reasoning": 0,
+              "acceptedPrediction": 0,
+              "rejectedPrediction": 0,
+              "cacheReadInputTokens": 0,
+              "cacheCreationInputTokens": 0
+            },
+            "assertions": {
+              "total": 0,
+              "prompt": 0,
+              "completion": 0,
+              "cached": 0,
+              "numRequests": 0,
+              "completionDetails": {
+                "reasoning": 0,
+                "acceptedPrediction": 0,
+                "rejectedPrediction": 0,
+                "cacheReadInputTokens": 0,
+                "cacheCreationInputTokens": 0
+              }
+            }
+          },
+          "namedScores": {},
+          "namedScoresCount": {},
+          "namedScoreWeights": {},
+          "cost": 0
+        }
+      }
+    ],
+    "results": [
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "309b3dec-087d-4054-94b4-1be9982daa1d",
+        "latencyMs": 1480,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#contact-form[full-name*:t, email*^:e, message*:ta](action=contact-submitted)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: contact-form\ntype: form\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n    sensitive: true\n  - name: message\n    type: textarea\n    label: Message\n    required: true\nonSubmit: contact-submitted\n```",
+          "tokenUsage": {
+            "total": 771,
+            "prompt": 669,
+            "completion": 102,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1480,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "990",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:32:59 GMT",
+                "modal-function-call-id": "fc-01KWC89F8CMXA5HR533A2DXSS8",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_generates_a_contact_form_matching_blueprint",
+          "vars": {
+            "request": "form#contact-form[full-name*:t, email*^:e, message*:ta](action=contact-submitted)",
+            "expected_mdma": "```mdma\ntype: form\nid: contact-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    required: true\n    sensitive: true\n  - name: message\n    type: textarea\n    label: \"Message\"\n    required: true\nonSubmit: contact-submitted\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 0,
+        "vars": {
+          "request": "form#contact-form[full-name*:t, email*^:e, message*:ta](action=contact-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: contact-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    required: true\n    sensitive: true\n  - name: message\n    type: textarea\n    label: \"Message\"\n    required: true\nonSubmit: contact-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "990",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:32:59 GMT",
+              "modal-function-call-id": "fc-01KWC89F8CMXA5HR533A2DXSS8",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "7c5134ba-ab60-4687-b77e-27fff45ffc51",
+        "latencyMs": 1517,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#employee-pii-form[full-name*:t, email*^:e, phone^:t, ssn*^:t](action=employee-pii-submitted)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: employee-pii-form\ntype: form\nonSubmit: employee-pii-submitted\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: Phone Number\n    required: true\n    sensitive: true\n  - name: ssn\n    type: text\n    label: Social Security Number\n    required: true\n    sensitive: true\n```",
+          "tokenUsage": {
+            "total": 826,
+            "prompt": 683,
+            "completion": 143,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1517,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1127",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:01 GMT",
+                "modal-function-call-id": "fc-01KWC89GZN5G4ARJHTYW2RZQX1",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_marks_pii_fields_as_sensitive",
+          "vars": {
+            "request": "form#employee-pii-form[full-name*:t, email*^:e, phone^:t, ssn*^:t](action=employee-pii-submitted)",
+            "expected_mdma": "```mdma\ntype: form\nid: employee-pii-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: \"Phone Number\"\n    sensitive: true\n  - name: ssn\n    type: text\n    label: \"Social Security Number\"\n    required: true\n    sensitive: true\nonSubmit: employee-pii-submitted\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 1,
+        "vars": {
+          "request": "form#employee-pii-form[full-name*:t, email*^:e, phone^:t, ssn*^:t](action=employee-pii-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: employee-pii-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: \"Phone Number\"\n    sensitive: true\n  - name: ssn\n    type: text\n    label: \"Social Security Number\"\n    required: true\n    sensitive: true\nonSubmit: employee-pii-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1127",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:01 GMT",
+              "modal-function-call-id": "fc-01KWC89GZN5G4ARJHTYW2RZQX1",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "0311c608-5ab7-4672-ae3d-eeaa17b66463",
+        "latencyMs": 618,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#maintenance-warning(variant=warning)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: maintenance-warning\ntype: callout\nvariant: warning\ncontent: \"Scheduled system maintenance will occur this weekend. Please save your work frequently to prevent data loss.\"\n```",
+          "tokenUsage": {
+            "total": 695,
+            "prompt": 652,
+            "completion": 43,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 618,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "822",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:02 GMT",
+                "modal-function-call-id": "fc-01KWC89JAWTPEH8SZKRMBF18T9",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_includes_a_thinking_block_before_main_content",
+          "vars": {
+            "request": "callout#maintenance-warning(variant=warning)",
+            "expected_mdma": "```mdma\ntype: callout\nid: maintenance-warning\nvariant: warning\ntitle: \"Scheduled Maintenance\"\ncontent: \"Our systems will be undergoing maintenance this weekend from Saturday 10 PM to Sunday 6 AM EST.\"\ndismissible: true\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 2,
+        "vars": {
+          "request": "callout#maintenance-warning(variant=warning)",
+          "expected_mdma": "```mdma\ntype: callout\nid: maintenance-warning\nvariant: warning\ntitle: \"Scheduled Maintenance\"\ncontent: \"Our systems will be undergoing maintenance this weekend from Saturday 10 PM to Sunday 6 AM EST.\"\ndismissible: true\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "822",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:02 GMT",
+              "modal-function-call-id": "fc-01KWC89JAWTPEH8SZKRMBF18T9",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "50d05e48-e536-4d91-96fd-b230f02af224",
+        "latencyMs": 1143,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"button#submit-report-btn(text=\\\"Submit Report\\\", action=submit-report, variant=primary)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: submit-report-btn\ntype: button\ntext: Submit Report\nonAction: submit-report\n```",
+          "tokenUsage": {
+            "total": 694,
+            "prompt": 664,
+            "completion": 30,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1143,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "724",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:03 GMT",
+                "modal-function-call-id": "fc-01KWC89K1M9V9TBCBX8SXC05NV",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_does_not_wrap_output_in_markdown_fences",
+          "vars": {
+            "request": "button#submit-report-btn(text=\"Submit Report\", action=submit-report, variant=primary)",
+            "expected_mdma": "```mdma\ntype: button\nid: submit-report-btn\ntext: \"Submit Report\"\nvariant: primary\nonAction: submit-report\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 3,
+        "vars": {
+          "request": "button#submit-report-btn(text=\"Submit Report\", action=submit-report, variant=primary)",
+          "expected_mdma": "```mdma\ntype: button\nid: submit-report-btn\ntext: \"Submit Report\"\nvariant: primary\nonAction: submit-report\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "724",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:03 GMT",
+              "modal-function-call-id": "fc-01KWC89K1M9V9TBCBX8SXC05NV",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "979264da-35c9-45cc-be37-dd15ee5b797d",
+        "latencyMs": 1059,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#country-selector-form[country*:s{us|uk|ca|au|de}](action=country-form-submitted)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: country-selector-form\ntype: form\nfields:\n  - name: country\n    type: select\n    label: Country\n    options:\n      - label: United States\n        value: us\n      - label: United Kingdom\n        value: uk\n      - label: Canada\n        value: ca\n      - label: Australia\n        value: au\n      - label: Germany\n        value: de\nonSubmit: country-form-submitted\n```",
+          "tokenUsage": {
+            "total": 778,
+            "prompt": 672,
+            "completion": 106,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1059,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1024",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:04 GMT",
+                "modal-function-call-id": "fc-01KWC89M4PGT3Z7KWB626DG2YH",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_select_field_has_options",
+          "vars": {
+            "request": "form#country-selector-form[country*:s{us|uk|ca|au|de}](action=country-form-submitted)",
+            "expected_mdma": "```mdma\ntype: form\nid: country-selector-form\nfields:\n  - name: country\n    type: select\n    label: \"Country\"\n    required: true\n    options:\n      - label: \"United States\"\n        value: us\n      - label: \"United Kingdom\"\n        value: uk\n      - label: \"Canada\"\n        value: ca\n      - label: \"Australia\"\n        value: au\n      - label: \"Germany\"\n        value: de\nonSubmit: country-form-submitted\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 4,
+        "vars": {
+          "request": "form#country-selector-form[country*:s{us|uk|ca|au|de}](action=country-form-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: country-selector-form\nfields:\n  - name: country\n    type: select\n    label: \"Country\"\n    required: true\n    options:\n      - label: \"United States\"\n        value: us\n      - label: \"United Kingdom\"\n        value: uk\n      - label: \"Canada\"\n        value: ca\n      - label: \"Australia\"\n        value: au\n      - label: \"Germany\"\n        value: de\nonSubmit: country-form-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1024",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:04 GMT",
+              "modal-function-call-id": "fc-01KWC89M4PGT3Z7KWB626DG2YH",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "64209f59-0fd2-4164-926f-2e11cedbd420",
+        "latencyMs": 1572,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"table#languages-table\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: languages-table\ntype: table\ncolumns:\n  - key: language\n    header: Language\n  - key: speakers\n    header: Native Speakers (Est.)\n  - key: region\n    header: Primary Region\ndata:\n  - language: English\n    speakers: 1.5B\n    region: Global\n  - language: Mandarin Chinese\n    speakers: 1.1B\n    region: East Asia\n  - language: Spanish\n    speakers: 560M\n    region: Americas / Spain\n  - language: French\n    speakers: 300M\n    region: Europe / Africa\n  - language: Hindi\n    speakers: 610M\n    region: South Asia\n```",
+          "tokenUsage": {
+            "total": 813,
+            "prompt": 646,
+            "completion": 167,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1572,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1181",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:05 GMT",
+                "modal-function-call-id": "fc-01KWC89N6WBN6Z7VKBBMXWB15J",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_generates_a_table_component_matching_blueprint",
+          "vars": {
+            "request": "table#languages-table",
+            "expected_mdma": "```mdma\ntype: table\nid: languages-table\nsortable: true\ncolumns:\n  - key: rank\n    header: \"Rank\"\n    sortable: true\n  - key: language\n    header: \"Language\"\n    sortable: true\n  - key: market-share\n    header: \"Market Share (%)\"\n    sortable: true\ndata:\n  - { rank: 1, language: \"Python\", market-share: 28.1 }\n  - { rank: 2, language: \"JavaScript\", market-share: 21.5 }\n  - { rank: 3, language: \"Java\", market-share: 15.3 }\n  - { rank: 4, language: \"C++\", market-share: 10.2 }\n  - { rank: 5, language: \"TypeScript\", market-share: 8.7 }\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 5,
+        "vars": {
+          "request": "table#languages-table",
+          "expected_mdma": "```mdma\ntype: table\nid: languages-table\nsortable: true\ncolumns:\n  - key: rank\n    header: \"Rank\"\n    sortable: true\n  - key: language\n    header: \"Language\"\n    sortable: true\n  - key: market-share\n    header: \"Market Share (%)\"\n    sortable: true\ndata:\n  - { rank: 1, language: \"Python\", market-share: 28.1 }\n  - { rank: 2, language: \"JavaScript\", market-share: 21.5 }\n  - { rank: 3, language: \"Java\", market-share: 15.3 }\n  - { rank: 4, language: \"C++\", market-share: 10.2 }\n  - { rank: 5, language: \"TypeScript\", market-share: 8.7 }\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1181",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:05 GMT",
+              "modal-function-call-id": "fc-01KWC89N6WBN6Z7VKBBMXWB15J",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "0949bc7f-8938-4146-ad7d-4cb1351775ae",
+        "latencyMs": 1034,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"chart#quarterly-sales(variant=bar)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: quarterly-sales\ntype: chart\nlabel: Quarterly Sales\nvariant: bar\ndata: |\n  Quarter,Revenue\n  Q1,45000\n  Q2,52000\n  Q3,48000\n  Q4,61000\n```",
+          "tokenUsage": {
+            "total": 726,
+            "prompt": 652,
+            "completion": 74,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1034,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "789",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:07 GMT",
+                "modal-function-call-id": "fc-01KWC89PRXPCZYANJENDN4B56H",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_generates_a_bar_chart_matching_blueprint",
+          "vars": {
+            "request": "chart#quarterly-sales(variant=bar)",
+            "expected_mdma": "```mdma\ntype: chart\nid: quarterly-sales\nvariant: bar\nlabel: \"Quarterly Sales\"\ndata: |\n  Quarter, Sales\n  Q1, 120000\n  Q2, 145000\n  Q3, 98000\n  Q4, 167000\nxAxis: Quarter\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 6,
+        "vars": {
+          "request": "chart#quarterly-sales(variant=bar)",
+          "expected_mdma": "```mdma\ntype: chart\nid: quarterly-sales\nvariant: bar\nlabel: \"Quarterly Sales\"\ndata: |\n  Quarter, Sales\n  Q1, 120000\n  Q2, 145000\n  Q3, 98000\n  Q4, 167000\nxAxis: Quarter\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "789",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:07 GMT",
+              "modal-function-call-id": "fc-01KWC89PRXPCZYANJENDN4B56H",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "baaa4b14-2ab5-4313-a372-1cad77584c92",
+        "latencyMs": 534,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"approval-gate#document-approval\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: document-approval\ntype: approval-gate\ntitle: Final Document Approval\n```",
+          "tokenUsage": {
+            "total": 672,
+            "prompt": 648,
+            "completion": 24,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 534,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "717",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:08 GMT",
+                "modal-function-call-id": "fc-01KWC89QTYW9WAT5FAVBDRHR2C",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_generates_an_approval_gate_matching_blueprint",
+          "vars": {
+            "request": "approval-gate#document-approval",
+            "expected_mdma": "```mdma\ntype: approval-gate\nid: document-approval\ntitle: \"Document Approval\"\ndescription: \"This document requires sign-off from 2 managers before the release can proceed.\"\nrequiredApprovers: 2\nonApprove: proceed-release\nonDeny: reject-release\nrequireReason: true\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 7,
+        "vars": {
+          "request": "approval-gate#document-approval",
+          "expected_mdma": "```mdma\ntype: approval-gate\nid: document-approval\ntitle: \"Document Approval\"\ndescription: \"This document requires sign-off from 2 managers before the release can proceed.\"\nrequiredApprovers: 2\nonApprove: proceed-release\nonDeny: reject-release\nrequireReason: true\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "717",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:08 GMT",
+              "modal-function-call-id": "fc-01KWC89QTYW9WAT5FAVBDRHR2C",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "8a01a921-3c47-41d4-8891-1781104ea180",
+        "latencyMs": 1538,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#severity-form[title*:t, reporter-email*^:e, severity*:s{critical|high|medium|low}](action=triage-submitted)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: severity-form\ntype: form\ntitle: Severity Report\nfields:\n  - name: title\n    type: text\n    label: Title\n    required: true\n  - name: reporter-email\n    type: email\n    label: Reporter Email\n    required: true\n    sensitive: true\n  - name: severity\n    type: select\n    label: Severity\n    required: true\n    options:\n      - label: Critical\n        value: critical\n      - label: High\n        value: high\n      - label: Medium\n        value: medium\n      - label: Low\n        value: low\nonSubmit: triage-submitted\n```",
+          "tokenUsage": {
+            "total": 828,
+            "prompt": 679,
+            "completion": 149,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1538,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1187",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:09 GMT",
+                "modal-function-call-id": "fc-01KWC89RDJQTJKH8RDY9JDDFW5",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_generates_an_incident_triage_severity_form",
+          "vars": {
+            "request": "form#severity-form[title*:t, reporter-email*^:e, severity*:s{critical|high|medium|low}](action=triage-submitted)",
+            "expected_mdma": "```mdma\ntype: form\nid: severity-form\nfields:\n  - name: title\n    type: text\n    label: \"Incident Title\"\n    required: true\n  - name: reporter-email\n    type: email\n    label: \"Reporter Email\"\n    required: true\n    sensitive: true\n  - name: severity\n    type: select\n    label: \"Severity\"\n    required: true\n    options:\n      - label: Critical\n        value: critical\n      - label: High\n        value: high\n      - label: Medium\n        value: medium\n      - label: Low\n        value: low\nonSubmit: triage-submitted\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 8,
+        "vars": {
+          "request": "form#severity-form[title*:t, reporter-email*^:e, severity*:s{critical|high|medium|low}](action=triage-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: severity-form\nfields:\n  - name: title\n    type: text\n    label: \"Incident Title\"\n    required: true\n  - name: reporter-email\n    type: email\n    label: \"Reporter Email\"\n    required: true\n    sensitive: true\n  - name: severity\n    type: select\n    label: \"Severity\"\n    required: true\n    options:\n      - label: Critical\n        value: critical\n      - label: High\n        value: high\n      - label: Medium\n        value: medium\n      - label: Low\n        value: low\nonSubmit: triage-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1187",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:09 GMT",
+              "modal-function-call-id": "fc-01KWC89RDJQTJKH8RDY9JDDFW5",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "922c79c9-1b25-4653-9908-2d22a8f16569",
+        "latencyMs": 1103,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#settings-form[display-name*:t, email*^:e](action=settings-saved)\\ncallout#settings-saved(variant=success)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: settings-form\nonSubmit: settings-saved\nfields:\n  - name: display-name\n    type: text\n    label: Display Name\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n    sensitive: true\n```\n\n```mdma\ntype: callout\nid: settings-saved\nvariant: success\ncontent: Your settings have been successfully updated.\n```",
+          "tokenUsage": {
+            "total": 784,
+            "prompt": 677,
+            "completion": 107,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1103,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1019",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:10 GMT",
+                "modal-function-call-id": "fc-01KWC89SYN6GPY6XDGDPNPA4ZY",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_all_component_ids_are_unique_and_kebab_case",
+          "vars": {
+            "request": "form#settings-form[display-name*:t, email*^:e](action=settings-saved)\ncallout#settings-saved(variant=success)",
+            "expected_mdma": "```mdma\ntype: form\nid: settings-form\nfields:\n  - name: display-name\n    type: text\n    label: \"Display Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\nonSubmit: settings-saved\n```\n\n```mdma\ntype: callout\nid: settings-saved\nvariant: success\ncontent: \"Your settings have been saved successfully.\"\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 9,
+        "vars": {
+          "request": "form#settings-form[display-name*:t, email*^:e](action=settings-saved)\ncallout#settings-saved(variant=success)",
+          "expected_mdma": "```mdma\ntype: form\nid: settings-form\nfields:\n  - name: display-name\n    type: text\n    label: \"Display Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\nonSubmit: settings-saved\n```\n\n```mdma\ntype: callout\nid: settings-saved\nvariant: success\ncontent: \"Your settings have been saved successfully.\"\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1019",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:10 GMT",
+              "modal-function-call-id": "fc-01KWC89SYN6GPY6XDGDPNPA4ZY",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "29d4750e-ed56-4d5f-80eb-b319a69b06ab",
+        "latencyMs": 623,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#password-expiry(variant=warning)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: password-expiry\ntype: callout\nvariant: warning\ncontent: \"Your password is set to expire in 7 days. Please update it to avoid being locked out of your account.\"\n```",
+          "tokenUsage": {
+            "total": 700,
+            "prompt": 652,
+            "completion": 48,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 623,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "811",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:11 GMT",
+                "modal-function-call-id": "fc-01KWC89V2DG7HM2C4KJME65Q0B",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_generates_a_warning_callout_matching_blueprint",
+          "vars": {
+            "request": "callout#password-expiry(variant=warning)",
+            "expected_mdma": "```mdma\ntype: callout\nid: password-expiry\nvariant: warning\ntitle: \"Password Expiring Soon\"\ncontent: \"Your password will expire in 3 days. Please update it to avoid losing access.\"\ndismissible: true\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 10,
+        "vars": {
+          "request": "callout#password-expiry(variant=warning)",
+          "expected_mdma": "```mdma\ntype: callout\nid: password-expiry\nvariant: warning\ntitle: \"Password Expiring Soon\"\ncontent: \"Your password will expire in 3 days. Please update it to avoid losing access.\"\ndismissible: true\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "811",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:11 GMT",
+              "modal-function-call-id": "fc-01KWC89V2DG7HM2C4KJME65Q0B",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "1a9d051e-8e87-4b09-a595-d7330729b7f7",
+        "latencyMs": 686,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#payment-error(variant=error)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: payment-error\ntype: callout\nvariant: error\ncontent: \"Payment failed. Please check your billing information and try again.\"\n```",
+          "tokenUsage": {
+            "total": 689,
+            "prompt": 652,
+            "completion": 37,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 686,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "774",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:12 GMT",
+                "modal-function-call-id": "fc-01KWC89VPQ9ETJA7R0FRGA93MS",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_generates_an_error_callout_matching_blueprint",
+          "vars": {
+            "request": "callout#payment-error(variant=error)",
+            "expected_mdma": "```mdma\ntype: callout\nid: payment-error\nvariant: error\ntitle: \"Payment Processing Unavailable\"\ncontent: \"Payment processing is currently unavailable. Please try again later or contact support.\"\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 11,
+        "vars": {
+          "request": "callout#payment-error(variant=error)",
+          "expected_mdma": "```mdma\ntype: callout\nid: payment-error\nvariant: error\ntitle: \"Payment Processing Unavailable\"\ncontent: \"Payment processing is currently unavailable. Please try again later or contact support.\"\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "774",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:12 GMT",
+              "modal-function-call-id": "fc-01KWC89VPQ9ETJA7R0FRGA93MS",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "c9ed1e84-6071-4039-83f3-bb08a8c09b31",
+        "latencyMs": 708,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#account-verified(variant=success)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: account-verified\ntype: callout\nvariant: success\ncontent: Your account has been successfully verified. You now have full access to all features.\n```",
+          "tokenUsage": {
+            "total": 692,
+            "prompt": 652,
+            "completion": 40,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 708,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "793",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:13 GMT",
+                "modal-function-call-id": "fc-01KWC89WDMS4D58YNXNQZCHY87",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_generates_a_success_callout_matching_blueprint",
+          "vars": {
+            "request": "callout#account-verified(variant=success)",
+            "expected_mdma": "```mdma\ntype: callout\nid: account-verified\nvariant: success\ntitle: \"Account Verified\"\ncontent: \"Your account has been successfully verified. You now have full access to all features.\"\ndismissible: true\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 12,
+        "vars": {
+          "request": "callout#account-verified(variant=success)",
+          "expected_mdma": "```mdma\ntype: callout\nid: account-verified\nvariant: success\ntitle: \"Account Verified\"\ncontent: \"Your account has been successfully verified. You now have full access to all features.\"\ndismissible: true\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "793",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:13 GMT",
+              "modal-function-call-id": "fc-01KWC89WDMS4D58YNXNQZCHY87",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "0e2a929f-47e5-415e-8875-f3d7c0ba9ea8",
+        "latencyMs": 870,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"chart#browser-share(variant=pie)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: browser-share\ntype: chart\nvariant: pie\nlabel: Browser Market Share\ndata: |\n  Browser,Share\n  Chrome,65%\n  Safari,18%\n  Firefox,4%\n  Edge,4%\n  Others,8%\n```",
+          "tokenUsage": {
+            "total": 718,
+            "prompt": 651,
+            "completion": 67,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 870,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "808",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:13 GMT",
+                "modal-function-call-id": "fc-01KWC89X3CEGEZ2MNWSHA6VZ9B",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_generates_a_pie_chart_matching_blueprint",
+          "vars": {
+            "request": "chart#browser-share(variant=pie)",
+            "expected_mdma": "```mdma\ntype: chart\nid: browser-share\nvariant: pie\nlabel: \"Browser Market Share\"\ndata: |\n  Browser, Share\n  Chrome, 65\n  Safari, 18\n  Firefox, 8\n  Edge, 5\n  Other, 4\nxAxis: Browser\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 13,
+        "vars": {
+          "request": "chart#browser-share(variant=pie)",
+          "expected_mdma": "```mdma\ntype: chart\nid: browser-share\nvariant: pie\nlabel: \"Browser Market Share\"\ndata: |\n  Browser, Share\n  Chrome, 65\n  Safari, 18\n  Firefox, 8\n  Edge, 5\n  Other, 4\nxAxis: Browser\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "808",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:13 GMT",
+              "modal-function-call-id": "fc-01KWC89X3CEGEZ2MNWSHA6VZ9B",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "07da7bba-ad67-4c84-8ce8-73289e3edbcb",
+        "latencyMs": 520,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"button#delete-account-btn(text=\\\"Delete Account\\\", action=delete-account, variant=danger)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: delete-account-btn\ntype: button\ntext: Delete Account\nonAction: delete-account\n```",
+          "tokenUsage": {
+            "total": 694,
+            "prompt": 664,
+            "completion": 30,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 520,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "727",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:14 GMT",
+                "modal-function-call-id": "fc-01KWC89XZEN625FR9B95XV3CHW",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_button_has_a_confirmation_dialog_matching_blueprint",
+          "vars": {
+            "request": "button#delete-account-btn(text=\"Delete Account\", action=delete-account, variant=danger)",
+            "expected_mdma": "```mdma\ntype: button\nid: delete-account-btn\ntext: \"Delete Account\"\nvariant: danger\nonAction: delete-account\nconfirm:\n  title: \"Delete Account?\"\n  message: \"This action is permanent and cannot be undone. All your data will be deleted.\"\n  confirmText: \"Yes, Delete\"\n  cancelText: \"Cancel\"\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 14,
+        "vars": {
+          "request": "button#delete-account-btn(text=\"Delete Account\", action=delete-account, variant=danger)",
+          "expected_mdma": "```mdma\ntype: button\nid: delete-account-btn\ntext: \"Delete Account\"\nvariant: danger\nonAction: delete-account\nconfirm:\n  title: \"Delete Account?\"\n  message: \"This action is permanent and cannot be undone. All your data will be deleted.\"\n  confirmText: \"Yes, Delete\"\n  cancelText: \"Cancel\"\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "727",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:14 GMT",
+              "modal-function-call-id": "fc-01KWC89XZEN625FR9B95XV3CHW",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "8705b038-dbe6-499d-bdc6-8929f585aa36",
+        "latencyMs": 946,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#ticket-form[subject*:t, description*:ta](action=ticket-submitted)\\ncallout#ticket-submitted(variant=success)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: ticket-form\nfields:\n  - name: subject\n    type: text\n    label: Subject\n  - name: description\n    type: textarea\n    label: Description\nonSubmit: ticket-submitted\n```\n\n```mdma\ntype: callout\nid: ticket-submitted\nvariant: success\ncontent: Your ticket has been submitted successfully.\n```",
+          "tokenUsage": {
+            "total": 761,
+            "prompt": 673,
+            "completion": 88,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 946,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "955",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:15 GMT",
+                "modal-function-call-id": "fc-01KWC89YFE91RW8B9Z6SBMBWTC",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "[REDACTED]",
+          "vars": {
+            "request": "form#ticket-form[subject*:t, description*:ta](action=ticket-submitted)\ncallout#ticket-submitted(variant=success)",
+            "expected_mdma": "```mdma\ntype: form\nid: ticket-form\nfields:\n  - name: subject\n    type: text\n    label: \"Subject\"\n    required: true\n  - name: description\n    type: textarea\n    label: \"Description\"\n    required: true\nonSubmit: ticket-submitted\n```\n\n```mdma\ntype: callout\nid: ticket-submitted\nvariant: success\ncontent: \"Your support ticket has been submitted. We'll get back to you shortly.\"\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 15,
+        "vars": {
+          "request": "form#ticket-form[subject*:t, description*:ta](action=ticket-submitted)\ncallout#ticket-submitted(variant=success)",
+          "expected_mdma": "```mdma\ntype: form\nid: ticket-form\nfields:\n  - name: subject\n    type: text\n    label: \"Subject\"\n    required: true\n  - name: description\n    type: textarea\n    label: \"Description\"\n    required: true\nonSubmit: ticket-submitted\n```\n\n```mdma\ntype: callout\nid: ticket-submitted\nvariant: success\ncontent: \"Your support ticket has been submitted. We'll get back to you shortly.\"\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "955",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:15 GMT",
+              "modal-function-call-id": "fc-01KWC89YFE91RW8B9Z6SBMBWTC",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "4fd8e52e-6852-45b3-8484-3dcfc0e5c1e7",
+        "latencyMs": 2009,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"table#employee-directory\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: employee-directory\ntype: table\ncolumns:\n  - key: name\n    header: Name\n  - key: department\n    header: Department\n  - key: role\n    header: Role\n  - key: email\n    header: Email\ndata:\n  - name: Alice Thompson\n    department: Engineering\n    role: Software Engineer\n    email: alice.t@company.com\n  - name: Bob Richards\n    department: Product\n    role: Product Manager\n    email: b.richards@company.com\n  - name: Charlie Davis\n    department: Design\n    role: UX Designer\n    email: charlie.d@company.com\n  - name: Diana Prince\n    department: Engineering\n    role: DevOps Lead\n    email: d.prince@company.com\n```",
+          "tokenUsage": {
+            "total": 831,
+            "prompt": 646,
+            "completion": 185,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2009,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1284",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:16 GMT",
+                "modal-function-call-id": "fc-01KWC89ZDSPGXSPFJM7XRXNAS4",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "[REDACTED]",
+          "vars": {
+            "request": "table#employee-directory",
+            "expected_mdma": "```mdma\ntype: table\nid: employee-directory\nsortable: true\nfilterable: true\ncolumns:\n  - key: name\n    header: \"Name\"\n    sortable: true\n  - key: department\n    header: \"Department\"\n    sortable: true\n  - key: role\n    header: \"Role\"\n    sortable: true\n  - key: start-date\n    header: \"Start Date\"\n    sortable: true\ndata:\n  - { name: \"Alice Johnson\", department: \"Engineering\", role: \"Senior Developer\", start-date: \"2021-03-15\" }\n  - { name: \"Bob Smith\", department: \"Marketing\", role: \"Campaign Manager\", start-date: \"2022-07-01\" }\n  - { name: \"Carol Davis\", department: \"Engineering\", role: \"Tech Lead\", start-date: \"2020-01-10\" }\n  - { name: \"Dan Wilson\", department: \"Sales\", role: \"Account Executive\", start-date: \"2023-02-20\" }\n  - { name: \"Eve Martinez\", department: \"HR\", role: \"Recruiter\", start-date: \"2022-11-05\" }\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 16,
+        "vars": {
+          "request": "table#employee-directory",
+          "expected_mdma": "```mdma\ntype: table\nid: employee-directory\nsortable: true\nfilterable: true\ncolumns:\n  - key: name\n    header: \"Name\"\n    sortable: true\n  - key: department\n    header: \"Department\"\n    sortable: true\n  - key: role\n    header: \"Role\"\n    sortable: true\n  - key: start-date\n    header: \"Start Date\"\n    sortable: true\ndata:\n  - { name: \"Alice Johnson\", department: \"Engineering\", role: \"Senior Developer\", start-date: \"2021-03-15\" }\n  - { name: \"Bob Smith\", department: \"Marketing\", role: \"Campaign Manager\", start-date: \"2022-07-01\" }\n  - { name: \"Carol Davis\", department: \"Engineering\", role: \"Tech Lead\", start-date: \"2020-01-10\" }\n  - { name: \"Dan Wilson\", department: \"Sales\", role: \"Account Executive\", start-date: \"2023-02-20\" }\n  - { name: \"Eve Martinez\", department: \"HR\", role: \"Recruiter\", start-date: \"2022-11-05\" }\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1284",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:16 GMT",
+              "modal-function-call-id": "fc-01KWC89ZDSPGXSPFJM7XRXNAS4",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "d84c53de-15e0-4020-9d61-7f9cca9a0d70",
+        "latencyMs": 1977,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#payment-form[cardholder-name*:t, card-number*^:t, expiry*^:t, cvv*^:t, billing-address*^:ta](action=payment-submitted)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: payment-form\ntype: form\nfields:\n  - name: cardholder-name\n    type: text\n    label: Cardholder Name\n    required: true\n    sensitive: true\n  - name: card-number\n    type: text\n    label: Card Number\n    required: true\n    sensitive: true\n  - name: expiry\n    type: text\n    label: Expiry Date (MM/YY)\n    required: true\n    sensitive: true\n  - name: cvv\n    type: text\n    label: CVV\n    required: true\n    sensitive: true\n  - name: billing-address\n    type: textarea\n    label: Billing Address\n    required: true\n    sensitive: true\nonSubmit: payment-submitted\n```\n\n```mdma\nid: payment-webhook\ntype: webhook\nurl: https://api.payments.example.com/process\ntrigger: payment-submitted\n```",
+          "tokenUsage": {
+            "total": 898,
+            "prompt": 688,
+            "completion": 210,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1977,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1364",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:18 GMT",
+                "modal-function-call-id": "fc-01KWC8A1DVQEEEVV1B46JFB5CF",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_marks_financial_fields_as_sensitive",
+          "vars": {
+            "request": "form#payment-form[cardholder-name*:t, card-number*^:t, expiry*^:t, cvv*^:t, billing-address*^:ta](action=payment-submitted)",
+            "expected_mdma": "```mdma\ntype: form\nid: payment-form\nfields:\n  - name: cardholder-name\n    type: text\n    label: \"Cardholder Name\"\n    required: true\n  - name: card-number\n    type: text\n    label: \"Credit Card Number\"\n    required: true\n    sensitive: true\n  - name: expiry\n    type: text\n    label: \"Expiration Date\"\n    required: true\n    sensitive: true\n  - name: cvv\n    type: text\n    label: \"CVV\"\n    required: true\n    sensitive: true\n  - name: billing-address\n    type: textarea\n    label: \"Billing Address\"\n    required: true\n    sensitive: true\nonSubmit: payment-submitted\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 17,
+        "vars": {
+          "request": "form#payment-form[cardholder-name*:t, card-number*^:t, expiry*^:t, cvv*^:t, billing-address*^:ta](action=payment-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: payment-form\nfields:\n  - name: cardholder-name\n    type: text\n    label: \"Cardholder Name\"\n    required: true\n  - name: card-number\n    type: text\n    label: \"Credit Card Number\"\n    required: true\n    sensitive: true\n  - name: expiry\n    type: text\n    label: \"Expiration Date\"\n    required: true\n    sensitive: true\n  - name: cvv\n    type: text\n    label: \"CVV\"\n    required: true\n    sensitive: true\n  - name: billing-address\n    type: textarea\n    label: \"Billing Address\"\n    required: true\n    sensitive: true\nonSubmit: payment-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1364",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:18 GMT",
+              "modal-function-call-id": "fc-01KWC8A1DVQEEEVV1B46JFB5CF",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "0cdce6b1-eb50-4385-99eb-73210d860208",
+        "latencyMs": 1252,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#user-registration[username*:t, email*^:e, password*^:t, bio:ta](action=registration-submitted)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: user-registration\nonSubmit: registration-submitted\nfields:\n  - name: username\n    type: text\n    label: Username\n    required: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n    sensitive: true\n  - name: password\n    type: text\n    label: Password\n    required: true\n    sensitive: true\n  - name: bio\n    type: textarea\n    label: Biography\n```",
+          "tokenUsage": {
+            "total": 788,
+            "prompt": 673,
+            "completion": 115,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1252,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1053",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:20 GMT",
+                "modal-function-call-id": "fc-01KWC8A3CTASV1K1J81HADMTB3",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_marks_mandatory_fields_as_required",
+          "vars": {
+            "request": "form#user-registration[username*:t, email*^:e, password*^:t, bio:ta](action=registration-submitted)",
+            "expected_mdma": "```mdma\ntype: form\nid: user-registration\nfields:\n  - name: username\n    type: text\n    label: \"Username\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\n  - name: password\n    type: text\n    label: \"Password\"\n    required: true\n    sensitive: true\n  - name: bio\n    type: textarea\n    label: \"Bio\"\nonSubmit: registration-submitted\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 18,
+        "vars": {
+          "request": "form#user-registration[username*:t, email*^:e, password*^:t, bio:ta](action=registration-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: user-registration\nfields:\n  - name: username\n    type: text\n    label: \"Username\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\n  - name: password\n    type: text\n    label: \"Password\"\n    required: true\n    sensitive: true\n  - name: bio\n    type: textarea\n    label: \"Bio\"\nonSubmit: registration-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1053",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:20 GMT",
+              "modal-function-call-id": "fc-01KWC8A3CTASV1K1J81HADMTB3",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "9f1d9968-59c8-4580-90b3-71b2bfd06f88",
+        "latencyMs": 962,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#settings-form[notifications-enabled:c](action=notification-info)\\ncallout#notification-info(variant=info)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: settings-form\nfields:\n  - name: notifications-enabled\n    type: checkbox\n    label: Enable Notifications\nonSubmit: notification-info\n```\n\n```mdma\ntype: callout\nid: notification-info\nvariant: info\ncontent: Notification settings have been updated.\n```",
+          "tokenUsage": {
+            "total": 745,
+            "prompt": 671,
+            "completion": 74,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 962,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "916",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:21 GMT",
+                "modal-function-call-id": "fc-01KWC8A4M1GQ17BN75Q5J9HHZF",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_uses_binding_expressions_across_components",
+          "vars": {
+            "request": "form#settings-form[notifications-enabled:c](action=notification-info)\ncallout#notification-info(variant=info)",
+            "expected_mdma": "```mdma\ntype: form\nid: settings-form\nfields:\n  - name: notifications-enabled\n    type: checkbox\n    label: \"Enable Notifications\"\nonSubmit: notification-info\n```\n\n```mdma\ntype: callout\nid: notification-info\nvariant: info\ntitle: \"Notification Preferences\"\ncontent: \"You will receive email notifications for account activity, security alerts, and weekly digests.\"\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 19,
+        "vars": {
+          "request": "form#settings-form[notifications-enabled:c](action=notification-info)\ncallout#notification-info(variant=info)",
+          "expected_mdma": "```mdma\ntype: form\nid: settings-form\nfields:\n  - name: notifications-enabled\n    type: checkbox\n    label: \"Enable Notifications\"\nonSubmit: notification-info\n```\n\n```mdma\ntype: callout\nid: notification-info\nvariant: info\ntitle: \"Notification Preferences\"\ncontent: \"You will receive email notifications for account activity, security alerts, and weekly digests.\"\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "916",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:21 GMT",
+              "modal-function-call-id": "fc-01KWC8A4M1GQ17BN75Q5J9HHZF",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "ac6c2e28-ce02-4f31-8721-481636c470c2",
+        "latencyMs": 1661,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#personal-info-form[full-name*:t, email*^:e, phone^:t, dob^:d, ssn*^:t](action=info-submitted)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: personal-info-form\ntype: form\nonSubmit: info-submitted\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: Phone Number\n    sensitive: true\n  - name: dob\n    type: date\n    label: Date of Birth\n    sensitive: true\n  - name: ssn\n    type: text\n    label: SSN\n    required: true\n    sensitive: true\n```",
+          "tokenUsage": {
+            "total": 840,
+            "prompt": 684,
+            "completion": 156,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1661,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1159",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:22 GMT",
+                "modal-function-call-id": "fc-01KWC8A5MMDZDMTJVFN517YF1W",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "[REDACTED]",
+          "vars": {
+            "request": "form#personal-info-form[full-name*:t, email*^:e, phone^:t, dob^:d, ssn*^:t](action=info-submitted)",
+            "expected_mdma": "```mdma\ntype: form\nid: personal-info-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: \"Phone Number\"\n    sensitive: true\n  - name: dob\n    type: date\n    label: \"Date of Birth\"\n    sensitive: true\n  - name: ssn\n    type: text\n    label: \"Social Security Number\"\n    required: true\n    sensitive: true\nonSubmit: info-submitted\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 20,
+        "vars": {
+          "request": "form#personal-info-form[full-name*:t, email*^:e, phone^:t, dob^:d, ssn*^:t](action=info-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: personal-info-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: \"Phone Number\"\n    sensitive: true\n  - name: dob\n    type: date\n    label: \"Date of Birth\"\n    sensitive: true\n  - name: ssn\n    type: text\n    label: \"Social Security Number\"\n    required: true\n    sensitive: true\nonSubmit: info-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1159",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:22 GMT",
+              "modal-function-call-id": "fc-01KWC8A5MMDZDMTJVFN517YF1W",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "788d510d-c936-4877-8bbe-44893df1b43b",
+        "latencyMs": 457,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"approval-gate#budget-approval\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: budget-approval\ntype: approval-gate\ntitle: Project Budget Approval\n```",
+          "tokenUsage": {
+            "total": 672,
+            "prompt": 648,
+            "completion": 24,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 457,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "715",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:24 GMT",
+                "modal-function-call-id": "fc-01KWC8A77GTR2F5EEGH94KNQVF",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "[REDACTED]",
+          "vars": {
+            "request": "approval-gate#budget-approval",
+            "expected_mdma": "```mdma\ntype: approval-gate\nid: budget-approval\ntitle: \"Budget Approval\"\ndescription: \"All budget requests over $10,000 require approval from authorized personnel.\"\nrequiredApprovers: 3\nallowedRoles:\n  - finance-manager\n  - director\nonApprove: budget-approved\nonDeny: budget-denied\nrequireReason: true\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 21,
+        "vars": {
+          "request": "approval-gate#budget-approval",
+          "expected_mdma": "```mdma\ntype: approval-gate\nid: budget-approval\ntitle: \"Budget Approval\"\ndescription: \"All budget requests over $10,000 require approval from authorized personnel.\"\nrequiredApprovers: 3\nallowedRoles:\n  - finance-manager\n  - director\nonApprove: budget-approved\nonDeny: budget-denied\nrequireReason: true\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "715",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:24 GMT",
+              "modal-function-call-id": "fc-01KWC8A77GTR2F5EEGH94KNQVF",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "0f117d49-12bf-4c82-b91f-45f63d0c3bdb",
+        "latencyMs": 1843,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#patient-intake[full-name*:t, dob*^:d, email^:e, phone^:t, medical-record*^:t, medications^:ta, allergies^:ta](action=patient-intake-submitted)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: patient-intake\ntype: form\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n    sensitive: true\n  - name: dob\n    type: date\n    label: Date of Birth\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: Phone Number\n    required: true\n    sensitive: true\n  - name: medical-record\n    type: text\n    label: Medical Record Number\n    required: true\n    sensitive: true\n  - name: medications\n    type: textarea\n    label: Current Medications\n  - name: allergies\n    type: textarea\n    label: Known Allergies\nonSubmit: patient-intake-submitted\n```",
+          "tokenUsage": {
+            "total": 902,
+            "prompt": 697,
+            "completion": 205,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1843,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1378",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:24 GMT",
+                "modal-function-call-id": "fc-01KWC8A7QYG917NV400Z2XY67N",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_marks_medical_and_personal_health_fields_as_sensitive",
+          "vars": {
+            "request": "form#patient-intake[full-name*:t, dob*^:d, email^:e, phone^:t, medical-record*^:t, medications^:ta, allergies^:ta](action=patient-intake-submitted)",
+            "expected_mdma": "```mdma\ntype: form\nid: patient-intake\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: dob\n    type: date\n    label: \"Date of Birth\"\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: \"Email\"\n    sensitive: true\n  - name: phone\n    type: text\n    label: \"Phone Number\"\n    sensitive: true\n  - name: medical-record\n    type: text\n    label: \"Medical Record Number\"\n    required: true\n    sensitive: true\n  - name: medications\n    type: textarea\n    label: \"Current Medications\"\n    sensitive: true\n  - name: allergies\n    type: textarea\n    label: \"Known Allergies\"\n    sensitive: true\nonSubmit: patient-intake-submitted\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 22,
+        "vars": {
+          "request": "form#patient-intake[full-name*:t, dob*^:d, email^:e, phone^:t, medical-record*^:t, medications^:ta, allergies^:ta](action=patient-intake-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: patient-intake\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: dob\n    type: date\n    label: \"Date of Birth\"\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: \"Email\"\n    sensitive: true\n  - name: phone\n    type: text\n    label: \"Phone Number\"\n    sensitive: true\n  - name: medical-record\n    type: text\n    label: \"Medical Record Number\"\n    required: true\n    sensitive: true\n  - name: medications\n    type: textarea\n    label: \"Current Medications\"\n    sensitive: true\n  - name: allergies\n    type: textarea\n    label: \"Known Allergies\"\n    sensitive: true\nonSubmit: patient-intake-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1378",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:24 GMT",
+              "modal-function-call-id": "fc-01KWC8A7QYG917NV400Z2XY67N",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "000ff024-0b8f-4c76-8459-e8868e32fdf2",
+        "latencyMs": 2607,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"chart#monthly-revenue(variant=bar)\\nchart#category-breakdown(variant=pie)\\ntable#region-summary\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: chart\nid: monthly-revenue\nlabel: Monthly Revenue\nvariant: bar\ndata: |\n  Month,Revenue\n  Jan,45000\n  Feb,52000\n  Mar,48000\n  Apr,61000\n  May,59000\n  Jun,72000\n```\n\n```mdma\ntype: chart\nid: category-breakdown\nlabel: Revenue by Category\nvariant: pie\ndata: |\n  Category,Amount\n  Software,45000\n  Hardware,32000\n  Services,28000\n  Subscriptions,15000\n```\n\n```mdma\ntype: table\nid: region-summary\ncolumns:\n  - key: region\n    header: Region\n  - key: sales\n    header: Sales\n  - key: growth\n    header: YoY Growth\ndata:\n  - region: North America\n    sales: 120000\n    growth: \"+12%\"\n  - region: EMEA\n    sales: 95000\n    growth: \"+8%\"\n  - region: APAC\n    sales: 88000\n    growth: \"+22%\"\n  - region: LATAM\n    sales: 34000\n    growth: \"-3%\"\n```",
+          "tokenUsage": {
+            "total": 982,
+            "prompt": 669,
+            "completion": 313,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2607,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1438",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:26 GMT",
+                "modal-function-call-id": "fc-01KWC8A9H4XMFAYVYX4C4WXQP9",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_generates_multiple_chart_types_in_a_dashboard",
+          "vars": {
+            "request": "chart#monthly-revenue(variant=bar)\nchart#category-breakdown(variant=pie)\ntable#region-summary",
+            "expected_mdma": "```mdma\ntype: chart\nid: monthly-revenue\nvariant: bar\nlabel: \"Monthly Revenue\"\ndata: |\n  Month, Revenue\n  January, 85000\n  February, 92000\n  March, 78000\n  April, 105000\n  May, 99000\n  June, 115000\nxAxis: Month\n```\n\n```mdma\ntype: chart\nid: category-breakdown\nvariant: pie\nlabel: \"Revenue by Category\"\ndata: |\n  Category, Revenue\n  Electronics, 180000\n  Clothing, 120000\n  Food, 95000\n  Services, 79000\nxAxis: Category\n```\n\n```mdma\ntype: table\nid: region-summary\nsortable: true\ncolumns:\n  - key: region\n    header: \"Region\"\n    sortable: true\n  - key: revenue\n    header: \"Revenue ($)\"\n    sortable: true\n  - key: growth\n    header: \"Growth (%)\"\n    sortable: true\ndata:\n  - { region: \"North America\", revenue: 250000, growth: 12.5 }\n  - { region: \"Europe\", revenue: 180000, growth: 8.3 }\n  - { region: \"Asia Pacific\", revenue: 145000, growth: 22.1 }\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 23,
+        "vars": {
+          "request": "chart#monthly-revenue(variant=bar)\nchart#category-breakdown(variant=pie)\ntable#region-summary",
+          "expected_mdma": "```mdma\ntype: chart\nid: monthly-revenue\nvariant: bar\nlabel: \"Monthly Revenue\"\ndata: |\n  Month, Revenue\n  January, 85000\n  February, 92000\n  March, 78000\n  April, 105000\n  May, 99000\n  June, 115000\nxAxis: Month\n```\n\n```mdma\ntype: chart\nid: category-breakdown\nvariant: pie\nlabel: \"Revenue by Category\"\ndata: |\n  Category, Revenue\n  Electronics, 180000\n  Clothing, 120000\n  Food, 95000\n  Services, 79000\nxAxis: Category\n```\n\n```mdma\ntype: table\nid: region-summary\nsortable: true\ncolumns:\n  - key: region\n    header: \"Region\"\n    sortable: true\n  - key: revenue\n    header: \"Revenue ($)\"\n    sortable: true\n  - key: growth\n    header: \"Growth (%)\"\n    sortable: true\ndata:\n  - { region: \"North America\", revenue: 250000, growth: 12.5 }\n  - { region: \"Europe\", revenue: 180000, growth: 8.3 }\n  - { region: \"Asia Pacific\", revenue: 145000, growth: 22.1 }\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1438",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:26 GMT",
+              "modal-function-call-id": "fc-01KWC8A9H4XMFAYVYX4C4WXQP9",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "86cd2337-1578-490a-93fa-d5def277ca16",
+        "latencyMs": 1124,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#resume-form[full-name*:t, resume*:f](action=resume-submitted)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: resume-form\ntype: form\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    required: true\n    sensitive: true\n  - name: resume\n    type: file\n    label: Upload Resume\n    required: true\nonSubmit: resume-submitted\n```\n\n```mdma\nid: resume-submitted\ntype: callout\nvariant: success\ncontent: Your resume has been submitted successfully for review.\n```",
+          "tokenUsage": {
+            "total": 772,
+            "prompt": 663,
+            "completion": 109,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1124,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1025",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:29 GMT",
+                "modal-function-call-id": "fc-01KWC8AC3DBY5JQVGXPAFVZGKR",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_generates_a_form_with_a_basic_file_upload_field",
+          "vars": {
+            "request": "form#resume-form[full-name*:t, resume*:f](action=resume-submitted)",
+            "expected_mdma": "```mdma\ntype: form\nid: resume-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: resume\n    type: file\n    label: \"Resume\"\n    required: true\nonSubmit: resume-submitted\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 24,
+        "vars": {
+          "request": "form#resume-form[full-name*:t, resume*:f](action=resume-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: resume-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: resume\n    type: file\n    label: \"Resume\"\n    required: true\nonSubmit: resume-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1025",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:29 GMT",
+              "modal-function-call-id": "fc-01KWC8AC3DBY5JQVGXPAFVZGKR",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 1 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "3badd643-38b4-4e01-9211-4f895841aacd",
+        "latencyMs": 897,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#kyc-form[full-name*^:t, passport-scan*^:f](action=kyc-submitted)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: kyc-form\ntype: form\nfields:\n  - name: full-name\n    type: text\n    label: Full Name\n    sensitive: true\n  - name: passport-scan\n    type: file\n    label: Passport Scan\n    sensitive: true\nonSubmit: kyc-submitted\n```",
+          "tokenUsage": {
+            "total": 746,
+            "prompt": 671,
+            "completion": 75,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 897,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "869",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:30 GMT",
+                "modal-function-call-id": "fc-01KWC8AD7SRZ73P2FD1TS8H64G",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_marks_a_sensitive_file_upload_passport_as_sensitive",
+          "vars": {
+            "request": "form#kyc-form[full-name*^:t, passport-scan*^:f](action=kyc-submitted)",
+            "expected_mdma": "```mdma\ntype: form\nid: kyc-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n    sensitive: true\n  - name: passport-scan\n    type: file\n    label: \"Passport Scan\"\n    required: true\n    sensitive: true\nonSubmit: kyc-submitted\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 25,
+        "vars": {
+          "request": "form#kyc-form[full-name*^:t, passport-scan*^:f](action=kyc-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: kyc-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n    sensitive: true\n  - name: passport-scan\n    type: file\n    label: \"Passport Scan\"\n    required: true\n    sensitive: true\nonSubmit: kyc-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "869",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:30 GMT",
+              "modal-function-call-id": "fc-01KWC8AD7SRZ73P2FD1TS8H64G",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "9a89279b-16e4-439f-bd4f-5a15e5e16bca",
+        "latencyMs": 1006,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#devcon-2026-registration[full-name*:t, email*^:e](action=devcon-registration-submitted)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: devcon-2026-registration\ntype: form\nfields:\n  - name: full_name\n    type: text\n    label: Full Name\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: Email Address\n    required: true\n    sensitive: true\nonSubmit: devcon-registration-submitted\n```",
+          "tokenUsage": {
+            "total": 764,
+            "prompt": 674,
+            "completion": 90,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1006,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "934",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:31 GMT",
+                "modal-function-call-id": "fc-01KWC8AE54KQAFXM0Y4ZKX9YDB",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "reg_preserves_a_specific_component_id_requested_by_the_user",
+          "vars": {
+            "request": "form#devcon-2026-registration[full-name*:t, email*^:e](action=devcon-registration-submitted)",
+            "expected_mdma": "```mdma\ntype: form\nid: devcon-2026-registration\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\nonSubmit: devcon-registration-submitted\n```",
+            "domainId": "regression",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 26,
+        "vars": {
+          "request": "form#devcon-2026-registration[full-name*:t, email*^:e](action=devcon-registration-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: devcon-2026-registration\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\nonSubmit: devcon-registration-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "934",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:31 GMT",
+              "modal-function-call-id": "fc-01KWC8AE54KQAFXM0Y4ZKX9YDB",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "ddcfe2ec-a83a-454e-80a8-05361fceef17",
+        "latencyMs": 1542,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#consent-notice(text=\\\"Consent to treatment\\\")\\nform#intake-form[full_name*:t, date_of_birth*^:d, email*^:e, visit_reason*:ta](action=submit-intake)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: consent-notice\ntype: callout\ncontent: Consent to treatment\nvariant: info\n```\n\n```mdma\nid: intake-form\ntype: form\nfields:\n  - name: full_name\n    label: Full Name\n    type: text\n    required: true\n    sensitive: true\n  - name: date_of_birth\n    label: Date of Birth\n    type: date\n    required: true\n    sensitive: true\n  - name: email\n    label: Email Address\n    type: email\n    required: true\n    sensitive: true\n  - name: visit_reason\n    label: Visit Reason\n    type: textarea\n    required: true\nonSubmit: submit-intake\n```",
+          "tokenUsage": {
+            "total": 862,
+            "prompt": 697,
+            "completion": 165,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1542,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1200",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:32 GMT",
+                "modal-function-call-id": "fc-01KWC8AF5G11E8ZWN16PK891EA",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_medical_intake_en",
+          "vars": {
+            "request": "callout#consent-notice(text=\"Consent to treatment\")\nform#intake-form[full_name*:t, date_of_birth*^:d, email*^:e, visit_reason*:ta](action=submit-intake)",
+            "expected_mdma": "```mdma\nid: consent-notice\ntype: callout\nvariant: info\ntitle: \"Consent to treatment\"\ncontent: \"By submitting this form you consent to be treated at this clinic and confirm the information is accurate.\"\ndismissible: false\n```\n\n```mdma\nid: intake-form\ntype: form\nonSubmit: submit-intake\nfields:\n  - name: full_name\n    type: text\n    label: \"Full name\"\n    required: true\n  - name: date_of_birth\n    type: date\n    label: \"Date of birth\"\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\n  - name: visit_reason\n    type: textarea\n    label: \"Reason for visit\"\n    required: true\n```",
+            "domainId": "medical_intake_clinic",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 27,
+        "vars": {
+          "request": "callout#consent-notice(text=\"Consent to treatment\")\nform#intake-form[full_name*:t, date_of_birth*^:d, email*^:e, visit_reason*:ta](action=submit-intake)",
+          "expected_mdma": "```mdma\nid: consent-notice\ntype: callout\nvariant: info\ntitle: \"Consent to treatment\"\ncontent: \"By submitting this form you consent to be treated at this clinic and confirm the information is accurate.\"\ndismissible: false\n```\n\n```mdma\nid: intake-form\ntype: form\nonSubmit: submit-intake\nfields:\n  - name: full_name\n    type: text\n    label: \"Full name\"\n    required: true\n  - name: date_of_birth\n    type: date\n    label: \"Date of birth\"\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\n  - name: visit_reason\n    type: textarea\n    label: \"Reason for visit\"\n    required: true\n```",
+          "domainId": "medical_intake_clinic",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1200",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:32 GMT",
+              "modal-function-call-id": "fc-01KWC8AF5G11E8ZWN16PK891EA",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "cf94adf6-c1bc-46ed-ab8c-7d8b5b0bb00b",
+        "latencyMs": 1758,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#refill-form[medication*:t, dosage*:t, pharmacy*:s{centrum|stare-miasto|dworzec}, last_fill:d](action=request-refill)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: refill-form\ntype: form\nfields:\n  - name: medication\n    type: text\n    label: Lek*\n    required: true\n  - name: dosage\n    type: text\n    label: Dawka*\n    required: true\n  - name: pharmacy\n    type: select\n    label: Apteka*\n    required: true\n    options:\n      - label: Centrum\n        value: centrum\n      - label: Stare Miasto\n        value: stare-miasto\n      - label: Dworzec\n        value: dworzec\n  - name: last_fill\n    type: date\n    label: Ostatnie wykupienie\nonSubmit: request-refill\n```",
+          "tokenUsage": {
+            "total": 856,
+            "prompt": 691,
+            "completion": 165,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1758,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1169",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:33 GMT",
+                "modal-function-call-id": "fc-01KWC8AGN6FRG4QYY0GAVZ6VP4",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_prescription_refill_pl",
+          "vars": {
+            "request": "form#refill-form[medication*:t, dosage*:t, pharmacy*:s{centrum|stare-miasto|dworzec}, last_fill:d](action=request-refill)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: refill-form\ntype: form\nonSubmit: request-refill\nfields:\n  - name: medication\n    type: text\n    label: \"Nazwa leku\"\n    required: true\n  - name: dosage\n    type: text\n    label: \"Dawka\"\n    required: true\n  - name: pharmacy\n    type: select\n    label: \"Apteka\"\n    required: true\n    options:\n      - { label: \"Centrum\", value: \"centrum\" }\n      - { label: \"Stare Miasto\", value: \"stare-miasto\" }\n      - { label: \"Dworzec\", value: \"dworzec\" }\n  - name: last_fill\n    type: date\n    label: \"Data ostatniej realizacji\"\n```",
+            "domainId": "prescription_refill",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 28,
+        "vars": {
+          "request": "form#refill-form[medication*:t, dosage*:t, pharmacy*:s{centrum|stare-miasto|dworzec}, last_fill:d](action=request-refill)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: refill-form\ntype: form\nonSubmit: request-refill\nfields:\n  - name: medication\n    type: text\n    label: \"Nazwa leku\"\n    required: true\n  - name: dosage\n    type: text\n    label: \"Dawka\"\n    required: true\n  - name: pharmacy\n    type: select\n    label: \"Apteka\"\n    required: true\n    options:\n      - { label: \"Centrum\", value: \"centrum\" }\n      - { label: \"Stare Miasto\", value: \"stare-miasto\" }\n      - { label: \"Dworzec\", value: \"dworzec\" }\n  - name: last_fill\n    type: date\n    label: \"Data ostatniej realizacji\"\n```",
+          "domainId": "prescription_refill",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1169",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:33 GMT",
+              "modal-function-call-id": "fc-01KWC8AGN6FRG4QYY0GAVZ6VP4",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "8f7ff6a6-9349-4557-b2cb-c35aa0043146",
+        "latencyMs": 1805,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#emergency-disclaimer(text=\\\"Not for emergencies\\\")\\nform#triage-form[main_symptom*:t, severity*:s{mild|moderate|severe}, duration_days*:n](action=submit-triage)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: emergency-disclaimer\ntype: callout\nvariant: error\ncontent: Not for emergencies\n```\n\n```mdma\nid: triage-form\ntype: form\nfields:\n  - name: main_symptom\n    type: text\n    label: Main Symptom\n    required: true\n  - name: severity\n    type: select\n    label: Severity\n    options:\n      - label: Mild\n        value: mild\n      - label: Moderate\n        value: moderate\n      - label: Severe\n        value: severe\n    required: true\n  - name: duration_days\n    type: number\n    label: Duration (Days)\n    required: true\nonSubmit: submit-triage\n```",
+          "tokenUsage": {
+            "total": 859,
+            "prompt": 694,
+            "completion": 165,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1805,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1215",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:35 GMT",
+                "modal-function-call-id": "fc-01KWC8AJD9H3RZBR6V39PANXT7",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_symptom_checker_en",
+          "vars": {
+            "request": "callout#emergency-disclaimer(text=\"Not for emergencies\")\nform#triage-form[main_symptom*:t, severity*:s{mild|moderate|severe}, duration_days*:n](action=submit-triage)",
+            "expected_mdma": "```mdma\nid: emergency-disclaimer\ntype: callout\nvariant: warning\ntitle: \"Not for emergencies\"\ncontent: \"If this is a medical emergency call your local emergency number now. This tool does not provide a diagnosis.\"\ndismissible: false\n```\n\n```mdma\nid: triage-form\ntype: form\nonSubmit: submit-triage\nfields:\n  - name: main_symptom\n    type: text\n    label: \"Main symptom\"\n    required: true\n  - name: severity\n    type: select\n    label: \"Severity\"\n    required: true\n    options:\n      - { label: \"Mild\", value: \"mild\" }\n      - { label: \"Moderate\", value: \"moderate\" }\n      - { label: \"Severe\", value: \"severe\" }\n  - name: duration_days\n    type: number\n    label: \"Days affected\"\n    required: true\n```",
+            "domainId": "symptom_checker_intake",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 29,
+        "vars": {
+          "request": "callout#emergency-disclaimer(text=\"Not for emergencies\")\nform#triage-form[main_symptom*:t, severity*:s{mild|moderate|severe}, duration_days*:n](action=submit-triage)",
+          "expected_mdma": "```mdma\nid: emergency-disclaimer\ntype: callout\nvariant: warning\ntitle: \"Not for emergencies\"\ncontent: \"If this is a medical emergency call your local emergency number now. This tool does not provide a diagnosis.\"\ndismissible: false\n```\n\n```mdma\nid: triage-form\ntype: form\nonSubmit: submit-triage\nfields:\n  - name: main_symptom\n    type: text\n    label: \"Main symptom\"\n    required: true\n  - name: severity\n    type: select\n    label: \"Severity\"\n    required: true\n    options:\n      - { label: \"Mild\", value: \"mild\" }\n      - { label: \"Moderate\", value: \"moderate\" }\n      - { label: \"Severe\", value: \"severe\" }\n  - name: duration_days\n    type: number\n    label: \"Days affected\"\n    required: true\n```",
+          "domainId": "symptom_checker_intake",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1215",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:35 GMT",
+              "modal-function-call-id": "fc-01KWC8AJD9H3RZBR6V39PANXT7",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "00b2e9d9-21c3-4fc5-9a49-eaf908dd4061",
+        "latencyMs": 1792,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#range-note(text=\\\"Reference ranges\\\")\\ntable#results-table\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: range-note\ntype: callout\ncontent: \"All results should be compared against the reference ranges provided by your laboratory.\"\nvariant: info\n```\n\n```mdma\nid: results-table\ntype: table\ncolumns:\n  - key: test_name\n    header: Test Name\n  - key: result\n    header: Result\n  - key: unit\n    header: Unit\n  - key: status\n    header: Status\ndata:\n  - test_name: Glucose, fasting\n    result: \"105\"\n    unit: mg/dL\n    status: High\n  - test_name: HbA1c\n    result: \"5.7\"\n    unit: \"%\"\n    status: Normal\n  - test_name: Creatinine\n    result: \"0.9\"\n    unit: mg/dL\n    status: Normal\n```",
+          "tokenUsage": {
+            "total": 859,
+            "prompt": 659,
+            "completion": 200,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1792,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1260",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:37 GMT",
+                "modal-function-call-id": "fc-01KWC8AM76NGY7FX9N7Y08VA7M",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_lab_results_table_en",
+          "vars": {
+            "request": "callout#range-note(text=\"Reference ranges\")\ntable#results-table",
+            "expected_mdma": "```mdma\nid: range-note\ntype: callout\nvariant: info\ntitle: \"Reference ranges\"\ncontent: \"Values outside the reference range are flagged. Discuss any concerns with your physician.\"\ndismissible: true\n```\n\n```mdma\nid: results-table\ntype: table\ncolumns:\n  - { key: test, header: \"Test\" }\n  - { key: value, header: \"Value\" }\n  - { key: range, header: \"Reference range\" }\n  - { key: flag, header: \"Flag\" }\ndata:\n  - { test: \"Hemoglobin\", value: \"13.8 g/dL\", range: \"13.5-17.5\", flag: \"normal\" }\n  - { test: \"Glucose\", value: \"112 mg/dL\", range: \"70-99\", flag: \"high\" }\n```",
+            "domainId": "lab_results_review",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 30,
+        "vars": {
+          "request": "callout#range-note(text=\"Reference ranges\")\ntable#results-table",
+          "expected_mdma": "```mdma\nid: range-note\ntype: callout\nvariant: info\ntitle: \"Reference ranges\"\ncontent: \"Values outside the reference range are flagged. Discuss any concerns with your physician.\"\ndismissible: true\n```\n\n```mdma\nid: results-table\ntype: table\ncolumns:\n  - { key: test, header: \"Test\" }\n  - { key: value, header: \"Value\" }\n  - { key: range, header: \"Reference range\" }\n  - { key: flag, header: \"Flag\" }\ndata:\n  - { test: \"Hemoglobin\", value: \"13.8 g/dL\", range: \"13.5-17.5\", flag: \"normal\" }\n  - { test: \"Glucose\", value: \"112 mg/dL\", range: \"70-99\", flag: \"high\" }\n```",
+          "domainId": "lab_results_review",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1260",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:37 GMT",
+              "modal-function-call-id": "fc-01KWC8AM76NGY7FX9N7Y08VA7M",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "8cfbfe23-c3d8-4bdb-b0c1-9fcce612c41c",
+        "latencyMs": 1159,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"chart#glucose-trend(variant=line)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: glucose-trend\ntype: chart\nlabel: Glucose Trend\nvariant: line\ndata: |\n  Date,Glucose (mg/dL)\n  2023-10-01,95\n  2023-10-02,102\n  2023-10-03,98\n  2023-10-04,110\n  2023-10-05,105\n```",
+          "tokenUsage": {
+            "total": 768,
+            "prompt": 651,
+            "completion": 117,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1159,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "832",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:39 GMT",
+                "modal-function-call-id": "fc-01KWC8AP03Z5H27BC0P4Q795GF",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_lab_results_chart_en",
+          "vars": {
+            "request": "chart#glucose-trend(variant=line)",
+            "expected_mdma": "```mdma\nid: glucose-trend\ntype: chart\nvariant: line\ndata: \"{{labs.glucose_history}}\"\nxAxis: date\nyAxis: value\n```",
+            "domainId": "lab_results_review",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 31,
+        "vars": {
+          "request": "chart#glucose-trend(variant=line)",
+          "expected_mdma": "```mdma\nid: glucose-trend\ntype: chart\nvariant: line\ndata: \"{{labs.glucose_history}}\"\nxAxis: date\nyAxis: value\n```",
+          "domainId": "lab_results_review",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "832",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:39 GMT",
+              "modal-function-call-id": "fc-01KWC8AP03Z5H27BC0P4Q795GF",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "1e8222ac-d866-4619-8d1a-b8675741764e",
+        "latencyMs": 1706,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#cancel-policy(text=\\\"Odwołania\\\")\\nform#booking-form[specialty*:s{kardiolog|dermatolog|ortopeda}, preferred_date*:d, insurance_number*^:t](action=book-appointment)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: callout\nid: cancel-policy\ncontent: Odwołania\nvariant: info\n```\n\n```mdma\ntype: form\nid: booking-form\nonSubmit: book-appointment\nfields:\n  - name: specialty\n    type: select\n    label: Specjalizacja\n    required: true\n    options:\n      - label: \"Kardiolog\"\n        value: \"kardiolog\"\n      - label: \"Dermatolog\"\n        value: \"dermatolog\"\n      - label: \"Ortopeda\"\n        value: \"ortopeda\"\n  - name: preferred_date\n    type: date\n    label: Preferowana data\n    required: true\n  - name: insurance_number\n    type: text\n    label: Numer ubezpieczenia\n    required: true\n    sensitive: true\n```",
+          "tokenUsage": {
+            "total": 895,
+            "prompt": 703,
+            "completion": 192,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1706,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1282",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:40 GMT",
+                "modal-function-call-id": "fc-01KWC8AQ5WDHJA14S105C5J3JX",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_appointment_booking_pl",
+          "vars": {
+            "request": "callout#cancel-policy(text=\"Odwołania\")\nform#booking-form[specialty*:s{kardiolog|dermatolog|ortopeda}, preferred_date*:d, insurance_number*^:t](action=book-appointment)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: cancel-policy\ntype: callout\nvariant: info\ntitle: \"Odwołania\"\ncontent: \"Wizytę można bezpłatnie odwołać najpóźniej 24 godziny przed terminem.\"\ndismissible: true\n```\n\n```mdma\nid: booking-form\ntype: form\nonSubmit: book-appointment\nfields:\n  - name: specialty\n    type: select\n    label: \"Specjalizacja\"\n    required: true\n    options:\n      - { label: \"Kardiolog\", value: \"kardiolog\" }\n      - { label: \"Dermatolog\", value: \"dermatolog\" }\n      - { label: \"Ortopeda\", value: \"ortopeda\" }\n  - name: preferred_date\n    type: date\n    label: \"Preferowana data\"\n    required: true\n  - name: insurance_number\n    type: text\n    label: \"Numer ubezpieczenia\"\n    required: true\n    sensitive: true\n```",
+            "domainId": "appointment_booking",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 32,
+        "vars": {
+          "request": "callout#cancel-policy(text=\"Odwołania\")\nform#booking-form[specialty*:s{kardiolog|dermatolog|ortopeda}, preferred_date*:d, insurance_number*^:t](action=book-appointment)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: cancel-policy\ntype: callout\nvariant: info\ntitle: \"Odwołania\"\ncontent: \"Wizytę można bezpłatnie odwołać najpóźniej 24 godziny przed terminem.\"\ndismissible: true\n```\n\n```mdma\nid: booking-form\ntype: form\nonSubmit: book-appointment\nfields:\n  - name: specialty\n    type: select\n    label: \"Specjalizacja\"\n    required: true\n    options:\n      - { label: \"Kardiolog\", value: \"kardiolog\" }\n      - { label: \"Dermatolog\", value: \"dermatolog\" }\n      - { label: \"Ortopeda\", value: \"ortopeda\" }\n  - name: preferred_date\n    type: date\n    label: \"Preferowana data\"\n    required: true\n  - name: insurance_number\n    type: text\n    label: \"Numer ubezpieczenia\"\n    required: true\n    sensitive: true\n```",
+          "domainId": "appointment_booking",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1282",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:40 GMT",
+              "modal-function-call-id": "fc-01KWC8AQ5WDHJA14S105C5J3JX",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "ec8d9db0-ccc8-417d-b310-3e350d336dab",
+        "latencyMs": 1423,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#vaccine-form[vaccine_type*:s{influenza|tetanus|covid-19}, date_administered*:d, batch_number*:t](action=save-vaccination)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: vaccine-form\ntype: form\nonSubmit: save-vaccination\nfields:\n  - name: vaccine_type\n    type: select\n    label: Vaccine Type\n    required: true\n    options:\n      - label: Influenza\n        value: influenza\n      - label: Tetanus\n        value: tetanus\n      - label: COVID-19\n        value: covid-19\n  - name: date_administered\n    type: date\n    label: Date Administered\n    required: true\n  - name: batch_number\n    type: text\n    label: Batch Number\n    required: true\n```",
+          "tokenUsage": {
+            "total": 829,
+            "prompt": 685,
+            "completion": 144,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1423,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1140",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:42 GMT",
+                "modal-function-call-id": "fc-01KWC8AS0AGWTYT52NACJ8XJ1S",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_vaccination_record_en",
+          "vars": {
+            "request": "form#vaccine-form[vaccine_type*:s{influenza|tetanus|covid-19}, date_administered*:d, batch_number*:t](action=save-vaccination)",
+            "expected_mdma": "```mdma\nid: vaccine-form\ntype: form\nonSubmit: save-vaccination\nfields:\n  - name: vaccine_type\n    type: select\n    label: \"Vaccine\"\n    required: true\n    options:\n      - { label: \"Influenza\", value: \"influenza\" }\n      - { label: \"Tetanus\", value: \"tetanus\" }\n      - { label: \"COVID-19\", value: \"covid-19\" }\n  - name: date_administered\n    type: date\n    label: \"Date administered\"\n    required: true\n  - name: batch_number\n    type: text\n    label: \"Batch number\"\n    required: true\n```",
+            "domainId": "vaccination_record",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 33,
+        "vars": {
+          "request": "form#vaccine-form[vaccine_type*:s{influenza|tetanus|covid-19}, date_administered*:d, batch_number*:t](action=save-vaccination)",
+          "expected_mdma": "```mdma\nid: vaccine-form\ntype: form\nonSubmit: save-vaccination\nfields:\n  - name: vaccine_type\n    type: select\n    label: \"Vaccine\"\n    required: true\n    options:\n      - { label: \"Influenza\", value: \"influenza\" }\n      - { label: \"Tetanus\", value: \"tetanus\" }\n      - { label: \"COVID-19\", value: \"covid-19\" }\n  - name: date_administered\n    type: date\n    label: \"Date administered\"\n    required: true\n  - name: batch_number\n    type: text\n    label: \"Batch number\"\n    required: true\n```",
+          "domainId": "vaccination_record",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1140",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:42 GMT",
+              "modal-function-call-id": "fc-01KWC8AS0AGWTYT52NACJ8XJ1S",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "d7cb5e3b-bebd-4d30-901c-7f46afea853b",
+        "latencyMs": 708,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#consent-info(text=\\\"Informed consent\\\")\\napproval-gate#investigator-signoff(text=\\\"Investigator sign-off\\\")\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: consent-info\ntype: callout\nvariant: info\ncontent: Informed consent\n```\n\n```mdma\nid: investigator-signoff\ntype: approval-gate\ntitle: Investigator sign-off\n```",
+          "tokenUsage": {
+            "total": 725,
+            "prompt": 673,
+            "completion": 52,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 708,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "809",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:43 GMT",
+                "modal-function-call-id": "fc-01KWC8ATD0ZVD7BJH3X79NYZX5",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_clinical_trial_consent_en",
+          "vars": {
+            "request": "callout#consent-info(text=\"Informed consent\")\napproval-gate#investigator-signoff(text=\"Investigator sign-off\")",
+            "expected_mdma": "```mdma\nid: consent-info\ntype: callout\nvariant: warning\ntitle: \"Informed consent\"\ncontent: \"Read the full study information sheet. Participation is voluntary and you may withdraw at any time.\"\ndismissible: false\n```\n\n```mdma\nid: investigator-signoff\ntype: approval-gate\ntitle: \"Investigator sign-off\"\ndescription: \"A principal investigator must confirm eligibility before enrollment.\"\nrequiredApprovers: 1\nallowedRoles: [investigator]\nonApprove: enroll-participant\nonDeny: decline-enrollment\nrequireReason: true\n```",
+            "domainId": "clinical_trial_consent",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 34,
+        "vars": {
+          "request": "callout#consent-info(text=\"Informed consent\")\napproval-gate#investigator-signoff(text=\"Investigator sign-off\")",
+          "expected_mdma": "```mdma\nid: consent-info\ntype: callout\nvariant: warning\ntitle: \"Informed consent\"\ncontent: \"Read the full study information sheet. Participation is voluntary and you may withdraw at any time.\"\ndismissible: false\n```\n\n```mdma\nid: investigator-signoff\ntype: approval-gate\ntitle: \"Investigator sign-off\"\ndescription: \"A principal investigator must confirm eligibility before enrollment.\"\nrequiredApprovers: 1\nallowedRoles: [investigator]\nonApprove: enroll-participant\nonDeny: decline-enrollment\nrequireReason: true\n```",
+          "domainId": "clinical_trial_consent",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "809",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:43 GMT",
+              "modal-function-call-id": "fc-01KWC8ATD0ZVD7BJH3X79NYZX5",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "848de53b-10e8-4296-b227-b10ee41f0d29",
+        "latencyMs": 1779,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#followup-form[pain_scale*:s{0|3|7}, adherence:c, notes:ta](action=submit-followup)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: followup-form\ntype: form\ntitle: Formularz Kontrolny\nfields:\n  - name: pain_scale\n    type: select\n    label: Skala bólu\n    required: true\n    options:\n      - label: \"0\"\n        value: \"0\"\n      - label: \"3\"\n        value: \"3\"\n      - label: \"7\"\n        value: \"7\"\n  - name: adherence\n    type: checkbox\n    label: Przestrzeganie zaleceń\n  - name: notes\n    type: textarea\n    label: Dodatkowe uwagi\nonSubmit: submit-followup\n```\n\n```mdma\nid: submit-followup\ntype: button\ntext: Wyślij dane\nonAction: submit-followup\n```",
+          "tokenUsage": {
+            "total": 865,
+            "prompt": 680,
+            "completion": 185,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1779,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1207",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:44 GMT",
+                "modal-function-call-id": "fc-01KWC8AV550K1VJRW5Q3CN2914",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_telehealth_followup_pl",
+          "vars": {
+            "request": "form#followup-form[pain_scale*:s{0|3|7}, adherence:c, notes:ta](action=submit-followup)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: followup-form\ntype: form\nonSubmit: submit-followup\nfields:\n  - name: pain_scale\n    type: select\n    label: \"Skala bólu\"\n    required: true\n    options:\n      - { label: \"Brak\", value: \"0\" }\n      - { label: \"Łagodny\", value: \"3\" }\n      - { label: \"Silny\", value: \"7\" }\n  - name: adherence\n    type: checkbox\n    label: \"Przyjmuję leki zgodnie z zaleceniami\"\n  - name: notes\n    type: textarea\n    label: \"Uwagi\"\n```",
+            "domainId": "telehealth_followup",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 35,
+        "vars": {
+          "request": "form#followup-form[pain_scale*:s{0|3|7}, adherence:c, notes:ta](action=submit-followup)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: followup-form\ntype: form\nonSubmit: submit-followup\nfields:\n  - name: pain_scale\n    type: select\n    label: \"Skala bólu\"\n    required: true\n    options:\n      - { label: \"Brak\", value: \"0\" }\n      - { label: \"Łagodny\", value: \"3\" }\n      - { label: \"Silny\", value: \"7\" }\n  - name: adherence\n    type: checkbox\n    label: \"Przyjmuję leki zgodnie z zaleceniami\"\n  - name: notes\n    type: textarea\n    label: \"Uwagi\"\n```",
+          "domainId": "telehealth_followup",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1207",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:44 GMT",
+              "modal-function-call-id": "fc-01KWC8AV550K1VJRW5Q3CN2914",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "967e02ef-92b9-4410-8a62-7a7f9f8d9ef4",
+        "latencyMs": 1376,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#kyc-notice(text=\\\"Identity verification\\\")\\nform#kyc-form[legal_name*:t, national_id*^:t, id_document*^:f](action=submit-kyc)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: kyc-notice\ntype: callout\ncontent: Identity verification\nvariant: info\n```\n\n```mdma\nid: kyc-form\ntype: form\nonSubmit: submit-kyc\nfields:\n  - name: legal_name\n    type: text\n    label: Legal Name\n    required: true\n    sensitive: true\n  - name: national_id\n    type: text\n    label: National ID\n    required: true\n    sensitive: true\n  - name: id_document\n    type: file\n    label: Identity Document\n    required: true\n    sensitive: true\n```",
+          "tokenUsage": {
+            "total": 832,
+            "prompt": 691,
+            "completion": 141,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1376,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1109",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:46 GMT",
+                "modal-function-call-id": "fc-01KWC8AWXMZYRA2PJ45ZCF2PGE",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_kyc_basic_en",
+          "vars": {
+            "request": "callout#kyc-notice(text=\"Identity verification\")\nform#kyc-form[legal_name*:t, national_id*^:t, id_document*^:f](action=submit-kyc)",
+            "expected_mdma": "```mdma\nid: kyc-notice\ntype: callout\nvariant: info\ntitle: \"Identity verification\"\ncontent: \"Your national ID and document are stored securely and used only for compliance review.\"\ndismissible: false\n```\n\n```mdma\nid: kyc-form\ntype: form\nonSubmit: submit-kyc\nfields:\n  - name: legal_name\n    type: text\n    label: \"Legal name\"\n    required: true\n  - name: national_id\n    type: text\n    label: \"National ID number\"\n    required: true\n    sensitive: true\n  - name: id_document\n    type: file\n    label: \"Photo ID upload\"\n    required: true\n    sensitive: true\n```",
+            "domainId": "kyc_basic",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 36,
+        "vars": {
+          "request": "callout#kyc-notice(text=\"Identity verification\")\nform#kyc-form[legal_name*:t, national_id*^:t, id_document*^:f](action=submit-kyc)",
+          "expected_mdma": "```mdma\nid: kyc-notice\ntype: callout\nvariant: info\ntitle: \"Identity verification\"\ncontent: \"Your national ID and document are stored securely and used only for compliance review.\"\ndismissible: false\n```\n\n```mdma\nid: kyc-form\ntype: form\nonSubmit: submit-kyc\nfields:\n  - name: legal_name\n    type: text\n    label: \"Legal name\"\n    required: true\n  - name: national_id\n    type: text\n    label: \"National ID number\"\n    required: true\n    sensitive: true\n  - name: id_document\n    type: file\n    label: \"Photo ID upload\"\n    required: true\n    sensitive: true\n```",
+          "domainId": "kyc_basic",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1109",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:46 GMT",
+              "modal-function-call-id": "fc-01KWC8AWXMZYRA2PJ45ZCF2PGE",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "354a6319-d0c2-4940-aa10-b7dda8daeae5",
+        "latencyMs": 1833,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#apr-disclosure(text=\\\"RRSO\\\")\\nform#loan-form[amount*:n, monthly_income*^:n, purpose*:s{konsolidacja|remont|inny}](action=submit-loan)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: apr-disclosure\ntype: callout\nvariant: info\ncontent: Szacowany RRSO wynosi 8,5%.\n```\n\n```mdma\nid: loan-form\ntype: form\nonSubmit: submit-loan\nfields:\n  - name: amount\n    type: number\n    label: Kwota kredytu\n    required: true\n  - name: monthly_income\n    type: number\n    label: Miesięczny dochód netto\n    required: true\n  - name: purpose\n    type: select\n    label: Cel kredytu\n    required: true\n    options:\n      - label: \"Konsolidacja\"\n        value: \"konsolidacja\"\n      - label: \"Remont\"\n        value: \"remont\"\n      - label: \"Inny\"\n        value: \"inny\"\n```",
+          "tokenUsage": {
+            "total": 890,
+            "prompt": 698,
+            "completion": 192,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1833,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1254",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:47 GMT",
+                "modal-function-call-id": "fc-01KWC8AYA5WK4MF7J97PXX6Z06",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_loan_application_pl",
+          "vars": {
+            "request": "callout#apr-disclosure(text=\"RRSO\")\nform#loan-form[amount*:n, monthly_income*^:n, purpose*:s{konsolidacja|remont|inny}](action=submit-loan)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: apr-disclosure\ntype: callout\nvariant: info\ntitle: \"RRSO\"\ncontent: \"Rzeczywista roczna stopa oprocentowania zależy od oceny zdolności kredytowej i zostanie przedstawiona przed podpisaniem umowy.\"\ndismissible: false\n```\n\n```mdma\nid: loan-form\ntype: form\nonSubmit: submit-loan\nfields:\n  - name: amount\n    type: number\n    label: \"Kwota pożyczki\"\n    required: true\n  - name: monthly_income\n    type: number\n    label: \"Miesięczny dochód\"\n    required: true\n    sensitive: true\n  - name: purpose\n    type: select\n    label: \"Cel\"\n    required: true\n    options:\n      - { label: \"Konsolidacja\", value: \"konsolidacja\" }\n      - { label: \"Remont\", value: \"remont\" }\n      - { label: \"Inny\", value: \"inny\" }\n```",
+            "domainId": "loan_application",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 37,
+        "vars": {
+          "request": "callout#apr-disclosure(text=\"RRSO\")\nform#loan-form[amount*:n, monthly_income*^:n, purpose*:s{konsolidacja|remont|inny}](action=submit-loan)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: apr-disclosure\ntype: callout\nvariant: info\ntitle: \"RRSO\"\ncontent: \"Rzeczywista roczna stopa oprocentowania zależy od oceny zdolności kredytowej i zostanie przedstawiona przed podpisaniem umowy.\"\ndismissible: false\n```\n\n```mdma\nid: loan-form\ntype: form\nonSubmit: submit-loan\nfields:\n  - name: amount\n    type: number\n    label: \"Kwota pożyczki\"\n    required: true\n  - name: monthly_income\n    type: number\n    label: \"Miesięczny dochód\"\n    required: true\n    sensitive: true\n  - name: purpose\n    type: select\n    label: \"Cel\"\n    required: true\n    options:\n      - { label: \"Konsolidacja\", value: \"konsolidacja\" }\n      - { label: \"Remont\", value: \"remont\" }\n      - { label: \"Inny\", value: \"inny\" }\n```",
+          "domainId": "loan_application",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1254",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:47 GMT",
+              "modal-function-call-id": "fc-01KWC8AYA5WK4MF7J97PXX6Z06",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "1d60115f-e997-4bfe-aec7-9bfac454526e",
+        "latencyMs": 1969,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#expense-form[category*:s{travel|meals|software}, total*:n](action=submit-expense)\\ntable#line-items\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: expense-form\ntype: form\nfields:\n  - name: category\n    type: select\n    label: Category\n    options:\n      - label: Travel\n        value: travel\n      - label: Meals\n        value: meals\n      - label: Software\n        value: software\n  - name: total\n    type: number\n    label: Total Amount\nonSubmit: submit-expense\n```\n\n```mdma\nid: line-items\ntype: table\ncolumns:\n  - key: description\n    header: Description\n  - key: amount\n    header: Amount\n  - key: date\n    header: Date\ndata:\n  - description: Flight to New York\n    amount: 450.00\n    date: '2023-10-01'\n  - description: Client Dinner\n    amount: 120.50\n    date: '2023-10-02'\n```",
+          "tokenUsage": {
+            "total": 893,
+            "prompt": 673,
+            "completion": 220,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1969,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1316",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:49 GMT",
+                "modal-function-call-id": "fc-01KWC8B04S1DY3BYVK4NXKG1VW",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_expense_report_en",
+          "vars": {
+            "request": "form#expense-form[category*:s{travel|meals|software}, total*:n](action=submit-expense)\ntable#line-items",
+            "expected_mdma": "```mdma\nid: expense-form\ntype: form\nonSubmit: submit-expense\nfields:\n  - name: category\n    type: select\n    label: \"Category\"\n    required: true\n    options:\n      - { label: \"Travel\", value: \"travel\" }\n      - { label: \"Meals\", value: \"meals\" }\n      - { label: \"Software\", value: \"software\" }\n  - name: total\n    type: number\n    label: \"Total amount\"\n    required: true\n```\n\n```mdma\nid: line-items\ntype: table\ncolumns:\n  - { key: item, header: \"Item\" }\n  - { key: date, header: \"Date\" }\n  - { key: amount, header: \"Amount\" }\ndata:\n  - { item: \"Flight\", date: \"2026-05-02\", amount: 320 }\n  - { item: \"Hotel\", date: \"2026-05-03\", amount: 180 }\n```",
+            "domainId": "expense_report",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 38,
+        "vars": {
+          "request": "form#expense-form[category*:s{travel|meals|software}, total*:n](action=submit-expense)\ntable#line-items",
+          "expected_mdma": "```mdma\nid: expense-form\ntype: form\nonSubmit: submit-expense\nfields:\n  - name: category\n    type: select\n    label: \"Category\"\n    required: true\n    options:\n      - { label: \"Travel\", value: \"travel\" }\n      - { label: \"Meals\", value: \"meals\" }\n      - { label: \"Software\", value: \"software\" }\n  - name: total\n    type: number\n    label: \"Total amount\"\n    required: true\n```\n\n```mdma\nid: line-items\ntype: table\ncolumns:\n  - { key: item, header: \"Item\" }\n  - { key: date, header: \"Date\" }\n  - { key: amount, header: \"Amount\" }\ndata:\n  - { item: \"Flight\", date: \"2026-05-02\", amount: 320 }\n  - { item: \"Hotel\", date: \"2026-05-03\", amount: 180 }\n```",
+          "domainId": "expense_report",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1316",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:49 GMT",
+              "modal-function-call-id": "fc-01KWC8B04S1DY3BYVK4NXKG1VW",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "0b7beb2e-ac88-4541-96e0-988cdb788c3e",
+        "latencyMs": 723,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"approval-gate#dual-approval(text=\\\"Dual approval\\\")\\nwebhook#payment-rail\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: approval-gate\nid: dual-approval\ntitle: Dual approval\n```\n\n```mdma\ntype: webhook\nid: payment-rail\nurl: https://api.payment-rail.io/v1/process\ntrigger: dual-approval\n```",
+          "tokenUsage": {
+            "total": 722,
+            "prompt": 660,
+            "completion": 62,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 723,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "821",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:51 GMT",
+                "modal-function-call-id": "fc-01KWC8B22PC5M3VN9MGVFDS172",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_wire_transfer_en",
+          "vars": {
+            "request": "approval-gate#dual-approval(text=\"Dual approval\")\nwebhook#payment-rail",
+            "expected_mdma": "```mdma\nid: dual-approval\ntype: approval-gate\ntitle: \"Dual approval\"\ndescription: \"Two authorized signers must approve outbound international transfers.\"\nrequiredApprovers: 2\nallowedRoles: [treasury]\nonApprove: release-wire\nonDeny: cancel-wire\nrequireReason: true\n```\n\n```mdma\nid: payment-rail\ntype: webhook\nurl: \"https://payments.example.com/api/v1/wire\"\nmethod: POST\ntrigger: release-wire\nbody:\n  reference: \"wire-transfer\"\n```",
+            "domainId": "wire_transfer_setup",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 39,
+        "vars": {
+          "request": "approval-gate#dual-approval(text=\"Dual approval\")\nwebhook#payment-rail",
+          "expected_mdma": "```mdma\nid: dual-approval\ntype: approval-gate\ntitle: \"Dual approval\"\ndescription: \"Two authorized signers must approve outbound international transfers.\"\nrequiredApprovers: 2\nallowedRoles: [treasury]\nonApprove: release-wire\nonDeny: cancel-wire\nrequireReason: true\n```\n\n```mdma\nid: payment-rail\ntype: webhook\nurl: \"https://payments.example.com/api/v1/wire\"\nmethod: POST\ntrigger: release-wire\nbody:\n  reference: \"wire-transfer\"\n```",
+          "domainId": "wire_transfer_setup",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "821",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:51 GMT",
+              "modal-function-call-id": "fc-01KWC8B22PC5M3VN9MGVFDS172",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "4448bad4-87ae-4445-a293-a78bcd194cd4",
+        "latencyMs": 2659,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#overspend-warning(text=\\\"Przekroczenie budżetu\\\")\\nchart#spend-by-category(variant=pie)\\ntable#transactions\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: overspend-warning\ntype: callout\nvariant: warning\ncontent: Przekroczenie budżetu\n```\n\n```mdma\nid: spend-by-category\ntype: chart\nvariant: pie\nlabel: Wydatki wg kategorii\ndata: |\n  Kategoria,Kwota\n  Jedzenie,1200\n  Transport,450\n  Rozrywka,300\n  Inne,150\n```\n\n```mdma\nid: transactions\ntype: table\ncolumns:\n  - key: date\n    header: Data\n  - key: description\n    header: Opis\n  - key: amount\n    header: Kwota\n  - key: category\n    header: Kategoria\ndata:\n  - date: 2023-10-01\n    description: Zakupy spożywcze\n    amount: 150.50\n    category: Jedzenie\n  - date: 2023-10-02\n    description: Bilet miesięczny\n    amount: 110.00\n    category: Transport\n  - date: 2023-10-05\n    description: Kino\n    amount: 45.00\n    category: Rozrywka\n```",
+          "tokenUsage": {
+            "total": 975,
+            "prompt": 681,
+            "completion": 294,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2659,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1424",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:53 GMT",
+                "modal-function-call-id": "fc-01KWC8B2VRDS7BANS3PQ9NK12X",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_budget_dashboard_pl",
+          "vars": {
+            "request": "callout#overspend-warning(text=\"Przekroczenie budżetu\")\nchart#spend-by-category(variant=pie)\ntable#transactions\n@lang: pl",
+            "expected_mdma": "```mdma\nid: overspend-warning\ntype: callout\nvariant: warning\ntitle: \"Przekroczenie budżetu\"\ncontent: \"Wydatki w kategorii Rozrywka przekroczyły zaplanowany limit o 15%.\"\ndismissible: false\n```\n\n```mdma\nid: spend-by-category\ntype: chart\nvariant: pie\ndata: \"{{budget.by_category}}\"\n```\n\n```mdma\nid: transactions\ntype: table\ncolumns:\n  - { key: merchant, header: \"Sprzedawca\" }\n  - { key: category, header: \"Kategoria\" }\n  - { key: amount, header: \"Kwota\" }\ndata:\n  - { merchant: \"Biedronka\", category: \"Spożywcze\", amount: 142 }\n  - { merchant: \"Netflix\", category: \"Rozrywka\", amount: 43 }\n```",
+            "domainId": "budget_dashboard",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 40,
+        "vars": {
+          "request": "callout#overspend-warning(text=\"Przekroczenie budżetu\")\nchart#spend-by-category(variant=pie)\ntable#transactions\n@lang: pl",
+          "expected_mdma": "```mdma\nid: overspend-warning\ntype: callout\nvariant: warning\ntitle: \"Przekroczenie budżetu\"\ncontent: \"Wydatki w kategorii Rozrywka przekroczyły zaplanowany limit o 15%.\"\ndismissible: false\n```\n\n```mdma\nid: spend-by-category\ntype: chart\nvariant: pie\ndata: \"{{budget.by_category}}\"\n```\n\n```mdma\nid: transactions\ntype: table\ncolumns:\n  - { key: merchant, header: \"Sprzedawca\" }\n  - { key: category, header: \"Kategoria\" }\n  - { key: amount, header: \"Kwota\" }\ndata:\n  - { merchant: \"Biedronka\", category: \"Spożywcze\", amount: 142 }\n  - { merchant: \"Netflix\", category: \"Rozrywka\", amount: 43 }\n```",
+          "domainId": "budget_dashboard",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1424",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:53 GMT",
+              "modal-function-call-id": "fc-01KWC8B2VRDS7BANS3PQ9NK12X",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "e219fdb4-1b4b-4729-b3e4-691466426e83",
+        "latencyMs": 1882,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#timeline-note(text=\\\"What happens next\\\")\\nform#dispute-form[transaction_id*:t, reason*:s{unauthorized|duplicate|not-received}, evidence:f](action=submit-dispute)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: timeline-note\ntype: callout\ncontent: What happens next?\nvariant: info\n```\n\n```mdma\nid: dispute-form\ntype: form\nfields:\n  - name: transaction_id\n    type: text\n    label: Transaction ID\n    required: true\n  - name: reason\n    type: select\n    label: Reason for dispute\n    required: true\n    options:\n      - label: \"Unauthorized\"\n        value: \"unauthorized\"\n      - label: \"Duplicate\"\n        value: \"duplicate\"\n      - label: \"Not received\"\n        value: \"not-received\"\n  - name: evidence\n    type: file\n    label: Evidence Attachment\nonSubmit: submit-dispute\n```",
+          "tokenUsage": {
+            "total": 863,
+            "prompt": 692,
+            "completion": 171,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1882,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1251",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:55 GMT",
+                "modal-function-call-id": "fc-01KWC8B5GG0C0KTV54HRK6Y9WE",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_card_dispute_en",
+          "vars": {
+            "request": "callout#timeline-note(text=\"What happens next\")\nform#dispute-form[transaction_id*:t, reason*:s{unauthorized|duplicate|not-received}, evidence:f](action=submit-dispute)",
+            "expected_mdma": "```mdma\nid: timeline-note\ntype: callout\nvariant: info\ntitle: \"What happens next\"\ncontent: \"Disputes are typically resolved within 10 business days. A provisional credit may be issued while we investigate.\"\ndismissible: false\n```\n\n```mdma\nid: dispute-form\ntype: form\nonSubmit: submit-dispute\nfields:\n  - name: transaction_id\n    type: text\n    label: \"Transaction ID\"\n    required: true\n  - name: reason\n    type: select\n    label: \"Reason\"\n    required: true\n    options:\n      - { label: \"Unauthorized\", value: \"unauthorized\" }\n      - { label: \"Duplicate charge\", value: \"duplicate\" }\n      - { label: \"Item not received\", value: \"not-received\" }\n  - name: evidence\n    type: file\n    label: \"Supporting evidence\"\n```",
+            "domainId": "card_dispute",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 41,
+        "vars": {
+          "request": "callout#timeline-note(text=\"What happens next\")\nform#dispute-form[transaction_id*:t, reason*:s{unauthorized|duplicate|not-received}, evidence:f](action=submit-dispute)",
+          "expected_mdma": "```mdma\nid: timeline-note\ntype: callout\nvariant: info\ntitle: \"What happens next\"\ncontent: \"Disputes are typically resolved within 10 business days. A provisional credit may be issued while we investigate.\"\ndismissible: false\n```\n\n```mdma\nid: dispute-form\ntype: form\nonSubmit: submit-dispute\nfields:\n  - name: transaction_id\n    type: text\n    label: \"Transaction ID\"\n    required: true\n  - name: reason\n    type: select\n    label: \"Reason\"\n    required: true\n    options:\n      - { label: \"Unauthorized\", value: \"unauthorized\" }\n      - { label: \"Duplicate charge\", value: \"duplicate\" }\n      - { label: \"Item not received\", value: \"not-received\" }\n  - name: evidence\n    type: file\n    label: \"Supporting evidence\"\n```",
+          "domainId": "card_dispute",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1251",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:55 GMT",
+              "modal-function-call-id": "fc-01KWC8B5GG0C0KTV54HRK6Y9WE",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "5bbb1509-43d4-4b58-8726-cbfbdd558cc3",
+        "latencyMs": 1492,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#risk-disclosure(text=\\\"Risk disclosure\\\")\\nform#crypto-form[tax_id*^:t, source_of_funds*:s{salary|savings|business}](action=submit-onboarding)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: callout\nid: risk-disclosure\ncontent: This investment carries risk. Please proceed with caution.\nvariant: warning\n```\n\n```mdma\ntype: form\nid: crypto-form\nonSubmit: submit-onboarding\nfields:\n  - name: tax_id\n    type: text\n    label: Tax ID\n    required: true\n    sensitive: true\n  - name: source_of_funds\n    type: select\n    label: Source of Funds\n    required: true\n    options:\n      - label: \"Salary\"\n        value: \"salary\"\n      - label: \"Savings\"\n        value: \"savings\"\n      - label: \"Business\"\n        value: \"business\"\n```",
+          "tokenUsage": {
+            "total": 855,
+            "prompt": 690,
+            "completion": 165,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1492,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1217",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:57 GMT",
+                "modal-function-call-id": "fc-01KWC8B7BC08GMA1HM5670S2DB",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_crypto_onboarding_en",
+          "vars": {
+            "request": "callout#risk-disclosure(text=\"Risk disclosure\")\nform#crypto-form[tax_id*^:t, source_of_funds*:s{salary|savings|business}](action=submit-onboarding)",
+            "expected_mdma": "```mdma\nid: risk-disclosure\ntype: callout\nvariant: warning\ntitle: \"Risk disclosure\"\ncontent: \"Crypto assets are volatile and you may lose your entire investment. Only invest what you can afford to lose.\"\ndismissible: false\n```\n\n```mdma\nid: crypto-form\ntype: form\nonSubmit: submit-onboarding\nfields:\n  - name: tax_id\n    type: text\n    label: \"Tax identification number\"\n    required: true\n    sensitive: true\n  - name: source_of_funds\n    type: select\n    label: \"Source of funds\"\n    required: true\n    options:\n      - { label: \"Salary\", value: \"salary\" }\n      - { label: \"Savings\", value: \"savings\" }\n      - { label: \"Business income\", value: \"business\" }\n```",
+            "domainId": "crypto_onboarding",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 42,
+        "vars": {
+          "request": "callout#risk-disclosure(text=\"Risk disclosure\")\nform#crypto-form[tax_id*^:t, source_of_funds*:s{salary|savings|business}](action=submit-onboarding)",
+          "expected_mdma": "```mdma\nid: risk-disclosure\ntype: callout\nvariant: warning\ntitle: \"Risk disclosure\"\ncontent: \"Crypto assets are volatile and you may lose your entire investment. Only invest what you can afford to lose.\"\ndismissible: false\n```\n\n```mdma\nid: crypto-form\ntype: form\nonSubmit: submit-onboarding\nfields:\n  - name: tax_id\n    type: text\n    label: \"Tax identification number\"\n    required: true\n    sensitive: true\n  - name: source_of_funds\n    type: select\n    label: \"Source of funds\"\n    required: true\n    options:\n      - { label: \"Salary\", value: \"salary\" }\n      - { label: \"Savings\", value: \"savings\" }\n      - { label: \"Business income\", value: \"business\" }\n```",
+          "domainId": "crypto_onboarding",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1217",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:57 GMT",
+              "modal-function-call-id": "fc-01KWC8B7BC08GMA1HM5670S2DB",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 1 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "ad4f62c9-2c38-4cc5-832b-1328c382c5ac",
+        "latencyMs": 2407,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#invoice-form[client_email*^:e, due_date*:d, tax_rate*:s{0|8|23}](action=create-invoice)\\ntable#invoice-items\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: invoice-form\nfields:\n  - name: client_email\n    type: email\n    label: Client Email\n    sensitive: true\n  - name: due_date\n    type: date\n    label: Due Date\n    sensitive: true\n  - name: tax_rate\n    type: select\n    label: Tax Rate\n    options:\n      - label: \"0%\"\n        value: \"0\"\n      - label: \"8%\"\n        value: \"8\"\n      - label: \"23%\"\n        value: \"23\"\nonSubmit: create-invoice\n```\n\n```mdma\ntype: table\nid: invoice-items\ncolumns:\n  - key: description\n    header: Description\n  - key: quantity\n    header: Qty\n  - key: unit_price\n    header: Unit Price\n  - key: amount\n    header: Amount\ndata:\n  - description: Web Design Services\n    quantity: 1\n    unit_price: 1200\n    amount: 1200\n  - description: Hosting (Annual)\n    quantity: 1\n    unit_price: 150\n    amount: 150\n```",
+          "tokenUsage": {
+            "total": 967,
+            "prompt": 687,
+            "completion": 280,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2407,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1497",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:33:59 GMT",
+                "modal-function-call-id": "fc-01KWC8B8TJX04QFR64TD6ADCEW",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_invoice_create_en",
+          "vars": {
+            "request": "form#invoice-form[client_email*^:e, due_date*:d, tax_rate*:s{0|8|23}](action=create-invoice)\ntable#invoice-items",
+            "expected_mdma": "```mdma\nid: invoice-form\ntype: form\nonSubmit: create-invoice\nfields:\n  - name: client_email\n    type: email\n    label: \"Client email\"\n    required: true\n    sensitive: true\n  - name: due_date\n    type: date\n    label: \"Due date\"\n    required: true\n  - name: tax_rate\n    type: select\n    label: \"Tax rate\"\n    required: true\n    options:\n      - { label: \"0%\", value: \"0\" }\n      - { label: \"8%\", value: \"8\" }\n      - { label: \"23%\", value: \"23\" }\n```\n\n```mdma\nid: invoice-items\ntype: table\ncolumns:\n  - { key: description, header: \"Description\" }\n  - { key: hours, header: \"Hours\" }\n  - { key: rate, header: \"Rate\" }\ndata:\n  - { description: \"UI design\", hours: 12, rate: 90 }\n  - { description: \"Frontend build\", hours: 20, rate: 95 }\n```",
+            "domainId": "invoice_create",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 43,
+        "vars": {
+          "request": "form#invoice-form[client_email*^:e, due_date*:d, tax_rate*:s{0|8|23}](action=create-invoice)\ntable#invoice-items",
+          "expected_mdma": "```mdma\nid: invoice-form\ntype: form\nonSubmit: create-invoice\nfields:\n  - name: client_email\n    type: email\n    label: \"Client email\"\n    required: true\n    sensitive: true\n  - name: due_date\n    type: date\n    label: \"Due date\"\n    required: true\n  - name: tax_rate\n    type: select\n    label: \"Tax rate\"\n    required: true\n    options:\n      - { label: \"0%\", value: \"0\" }\n      - { label: \"8%\", value: \"8\" }\n      - { label: \"23%\", value: \"23\" }\n```\n\n```mdma\nid: invoice-items\ntype: table\ncolumns:\n  - { key: description, header: \"Description\" }\n  - { key: hours, header: \"Hours\" }\n  - { key: rate, header: \"Rate\" }\ndata:\n  - { description: \"UI design\", hours: 12, rate: 90 }\n  - { description: \"Frontend build\", hours: 20, rate: 95 }\n```",
+          "domainId": "invoice_create",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1497",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:33:59 GMT",
+              "modal-function-call-id": "fc-01KWC8B8TJX04QFR64TD6ADCEW",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "9efaef68-f53e-43e7-aafd-e27617242703",
+        "latencyMs": 972,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#onboarding-welcome(text=\\\"Welcome aboard\\\")\\ntasklist#onboarding-tasks(action=complete-onboarding)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: onboarding-welcome\ntype: callout\ncontent: Welcome aboard\nvariant: info\n```\n\n```mdma\nid: onboarding-tasks\ntype: tasklist\nitems:\n  - id: setup-profile\n    text: Complete your user profile\n  - id: security-config\n    text: Enable two-factor authentication\n  - id: invite-team\n    text: Invite your first team member\n```",
+          "tokenUsage": {
+            "total": 766,
+            "prompt": 670,
+            "completion": 96,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 972,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "974",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:01 GMT",
+                "modal-function-call-id": "fc-01KWC8BB6TRHRW34GSM24XDXS4",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_employee_onboarding_en",
+          "vars": {
+            "request": "callout#onboarding-welcome(text=\"Welcome aboard\")\ntasklist#onboarding-tasks(action=complete-onboarding)",
+            "expected_mdma": "```mdma\nid: onboarding-welcome\ntype: callout\nvariant: success\ntitle: \"Welcome aboard\"\ncontent: \"Work through each item below to finish your first-day setup.\"\ndismissible: false\n```\n\n```mdma\nid: onboarding-tasks\ntype: tasklist\nonComplete: complete-onboarding\nitems:\n  - id: sign-contract\n    text: \"Sign employment contract\"\n  - id: setup-email\n    text: \"Set up company email\"\n  - id: read-handbook\n    text: \"Read the employee handbook\"\n```",
+            "domainId": "employee_onboarding",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 44,
+        "vars": {
+          "request": "callout#onboarding-welcome(text=\"Welcome aboard\")\ntasklist#onboarding-tasks(action=complete-onboarding)",
+          "expected_mdma": "```mdma\nid: onboarding-welcome\ntype: callout\nvariant: success\ntitle: \"Welcome aboard\"\ncontent: \"Work through each item below to finish your first-day setup.\"\ndismissible: false\n```\n\n```mdma\nid: onboarding-tasks\ntype: tasklist\nonComplete: complete-onboarding\nitems:\n  - id: sign-contract\n    text: \"Sign employment contract\"\n  - id: setup-email\n    text: \"Set up company email\"\n  - id: read-handbook\n    text: \"Read the employee handbook\"\n```",
+          "domainId": "employee_onboarding",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "974",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:01 GMT",
+              "modal-function-call-id": "fc-01KWC8BB6TRHRW34GSM24XDXS4",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "a4bbc893-03ef-4015-8f76-b3d3e656466c",
+        "latencyMs": 1650,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#leave-form[leave_type*:s{wypoczynkowy|na-zadanie|bezplatny}, start_date*:d, end_date*:d, note:ta](action=submit-leave)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: leave-form\nonSubmit: submit-leave\nfields:\n  - name: leave_type\n    type: select\n    label: Rodzaj urlopu\n    required: true\n    options:\n      - label: Wypoczynkowy\n        value: wypoczynkowy\n      - label: Na zadanie\n        value: na-zadanie\n      - label: Bezpłatny\n        value: bezplatny\n  - name: start_date\n    type: date\n    label: Data rozpoczęcia\n    required: true\n  - name: end_date\n    type: date\n    label: Data zakończenia\n    required: true\n  - name: note\n    type: textarea\n    label: Uwagi\n```",
+          "tokenUsage": {
+            "total": 868,
+            "prompt": 695,
+            "completion": 173,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1650,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1196",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:02 GMT",
+                "modal-function-call-id": "fc-01KWC8BC7EMYYA5SXC0RYMDWM6",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_leave_request_pl",
+          "vars": {
+            "request": "form#leave-form[leave_type*:s{wypoczynkowy|na-zadanie|bezplatny}, start_date*:d, end_date*:d, note:ta](action=submit-leave)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: leave-form\ntype: form\nonSubmit: submit-leave\nfields:\n  - name: leave_type\n    type: select\n    label: \"Rodzaj urlopu\"\n    required: true\n    options:\n      - { label: \"Wypoczynkowy\", value: \"wypoczynkowy\" }\n      - { label: \"Na żądanie\", value: \"na-zadanie\" }\n      - { label: \"Bezpłatny\", value: \"bezplatny\" }\n  - name: start_date\n    type: date\n    label: \"Data rozpoczęcia\"\n    required: true\n  - name: end_date\n    type: date\n    label: \"Data zakończenia\"\n    required: true\n  - name: note\n    type: textarea\n    label: \"Uzasadnienie\"\n```",
+            "domainId": "leave_request",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 45,
+        "vars": {
+          "request": "form#leave-form[leave_type*:s{wypoczynkowy|na-zadanie|bezplatny}, start_date*:d, end_date*:d, note:ta](action=submit-leave)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: leave-form\ntype: form\nonSubmit: submit-leave\nfields:\n  - name: leave_type\n    type: select\n    label: \"Rodzaj urlopu\"\n    required: true\n    options:\n      - { label: \"Wypoczynkowy\", value: \"wypoczynkowy\" }\n      - { label: \"Na żądanie\", value: \"na-zadanie\" }\n      - { label: \"Bezpłatny\", value: \"bezplatny\" }\n  - name: start_date\n    type: date\n    label: \"Data rozpoczęcia\"\n    required: true\n  - name: end_date\n    type: date\n    label: \"Data zakończenia\"\n    required: true\n  - name: note\n    type: textarea\n    label: \"Uzasadnienie\"\n```",
+          "domainId": "leave_request",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1196",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:02 GMT",
+              "modal-function-call-id": "fc-01KWC8BC7EMYYA5SXC0RYMDWM6",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "0641f9a0-dadb-4f6d-a7c4-fa69a26c1c2f",
+        "latencyMs": 2046,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#confidentiality(text=\\\"Confidential\\\")\\nform#review-form[delivery*:s{below|meets|exceeds}, collaboration*:s{below|meets|exceeds}, summary*:ta](action=submit-review)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: callout\nid: confidentiality\ncontent: Confidential\nvariant: info\n```\n\n```mdma\ntype: form\nid: review-form\nonSubmit: submit-review\nfields:\n  - name: delivery\n    type: select\n    label: Delivery Performance\n    required: true\n    options:\n      - label: \"Below\"\n        value: \"below\"\n      - label: \"Meets\"\n        value: \"meets\"\n      - label: \"Exceeds\"\n        value: \"exceeds\"\n  - name: collaboration\n    type: select\n    label: Collaboration\n    required: true\n    options:\n      - label: \"Below\"\n        value: \"below\"\n      - label: \"Meets\"\n        value: \"meets\"\n      - label: \"Exceeds\"\n        value: \"exceeds\"\n  - name: summary\n    type: textarea\n    label: Summary\n    required: true\n```",
+          "tokenUsage": {
+            "total": 914,
+            "prompt": 694,
+            "completion": 220,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2046,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1402",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:04 GMT",
+                "modal-function-call-id": "fc-01KWC8BDTBR72PFZC1QCDQ09DJ",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_performance_review_en",
+          "vars": {
+            "request": "callout#confidentiality(text=\"Confidential\")\nform#review-form[delivery*:s{below|meets|exceeds}, collaboration*:s{below|meets|exceeds}, summary*:ta](action=submit-review)",
+            "expected_mdma": "```mdma\nid: confidentiality\ntype: callout\nvariant: info\ntitle: \"Confidential\"\ncontent: \"Your self-assessment is shared only with your manager and HR.\"\ndismissible: false\n```\n\n```mdma\nid: review-form\ntype: form\nonSubmit: submit-review\nfields:\n  - name: delivery\n    type: select\n    label: \"Delivery\"\n    required: true\n    options:\n      - { label: \"Below\", value: \"below\" }\n      - { label: \"Meets\", value: \"meets\" }\n      - { label: \"Exceeds\", value: \"exceeds\" }\n  - name: collaboration\n    type: select\n    label: \"Collaboration\"\n    required: true\n    options:\n      - { label: \"Below\", value: \"below\" }\n      - { label: \"Meets\", value: \"meets\" }\n      - { label: \"Exceeds\", value: \"exceeds\" }\n  - name: summary\n    type: textarea\n    label: \"Summary\"\n    required: true\n```",
+            "domainId": "performance_review",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 46,
+        "vars": {
+          "request": "callout#confidentiality(text=\"Confidential\")\nform#review-form[delivery*:s{below|meets|exceeds}, collaboration*:s{below|meets|exceeds}, summary*:ta](action=submit-review)",
+          "expected_mdma": "```mdma\nid: confidentiality\ntype: callout\nvariant: info\ntitle: \"Confidential\"\ncontent: \"Your self-assessment is shared only with your manager and HR.\"\ndismissible: false\n```\n\n```mdma\nid: review-form\ntype: form\nonSubmit: submit-review\nfields:\n  - name: delivery\n    type: select\n    label: \"Delivery\"\n    required: true\n    options:\n      - { label: \"Below\", value: \"below\" }\n      - { label: \"Meets\", value: \"meets\" }\n      - { label: \"Exceeds\", value: \"exceeds\" }\n  - name: collaboration\n    type: select\n    label: \"Collaboration\"\n    required: true\n    options:\n      - { label: \"Below\", value: \"below\" }\n      - { label: \"Meets\", value: \"meets\" }\n      - { label: \"Exceeds\", value: \"exceeds\" }\n  - name: summary\n    type: textarea\n    label: \"Summary\"\n    required: true\n```",
+          "domainId": "performance_review",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1402",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:04 GMT",
+              "modal-function-call-id": "fc-01KWC8BDTBR72PFZC1QCDQ09DJ",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "3e8784e2-08ff-4de4-a318-13e41bfe6022",
+        "latencyMs": 1414,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#reimb-form[amount*:n, per_diem*:s{krajowa|zagraniczna}, receipt*:f](action=submit-reimbursement)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: reimb-form\ntype: form\ntitle: Formularz zwrotu kosztów\nonSubmit: submit-reimbursement\nfields:\n  - name: amount\n    type: number\n    label: Kwota do zwrotu\n    required: true\n  - name: per_diem\n    type: select\n    label: Typ diety\n    required: true\n    options:\n      - label: Krajowa\n        value: krajowa\n      - label: Zagraniczna\n        value: zagraniczna\n  - name: receipt\n    type: file\n    label: Załącznik paragonu/faktury\n    required: true\n```",
+          "tokenUsage": {
+            "total": 835,
+            "prompt": 684,
+            "completion": 151,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1414,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1123",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:06 GMT",
+                "modal-function-call-id": "fc-01KWC8BFWYWDVDHC3FADSXYT2B",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_expense_reimbursement_pl",
+          "vars": {
+            "request": "form#reimb-form[amount*:n, per_diem*:s{krajowa|zagraniczna}, receipt*:f](action=submit-reimbursement)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: reimb-form\ntype: form\nonSubmit: submit-reimbursement\nfields:\n  - name: amount\n    type: number\n    label: \"Kwota\"\n    required: true\n  - name: per_diem\n    type: select\n    label: \"Dieta\"\n    required: true\n    options:\n      - { label: \"Krajowa\", value: \"krajowa\" }\n      - { label: \"Zagraniczna\", value: \"zagraniczna\" }\n  - name: receipt\n    type: file\n    label: \"Paragon\"\n    required: true\n```",
+            "domainId": "expense_reimbursement_hr",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 47,
+        "vars": {
+          "request": "form#reimb-form[amount*:n, per_diem*:s{krajowa|zagraniczna}, receipt*:f](action=submit-reimbursement)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: reimb-form\ntype: form\nonSubmit: submit-reimbursement\nfields:\n  - name: amount\n    type: number\n    label: \"Kwota\"\n    required: true\n  - name: per_diem\n    type: select\n    label: \"Dieta\"\n    required: true\n    options:\n      - { label: \"Krajowa\", value: \"krajowa\" }\n      - { label: \"Zagraniczna\", value: \"zagraniczna\" }\n  - name: receipt\n    type: file\n    label: \"Paragon\"\n    required: true\n```",
+          "domainId": "expense_reimbursement_hr",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1123",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:06 GMT",
+              "modal-function-call-id": "fc-01KWC8BFWYWDVDHC3FADSXYT2B",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "28cdc13e-4507-420a-8162-03937cd5eb6c",
+        "latencyMs": 2031,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#freeze-note(text=\\\"Hiring freeze\\\")\\nchart#headcount-by-dept(variant=bar)\\ntable#attrition\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: freeze-note\ntype: callout\ncontent: \"Hiring freeze is currently in effect for all departments.\"\nvariant: warning\n```\n\n```mdma\nid: headcount-by-dept\ntype: chart\nlabel: Headcount by Department\nvariant: bar\ndata: |\n  Department,Count\n  Engineering,45\n  Sales,22\n  Marketing,15\n  Product,12\n  Ops,18\n```\n\n```mdma\nid: attrition\ntype: table\ncolumns:\n  - key: dept\n    header: Department\n  - key: attrition_rate\n    header: Attrition Rate\ndata:\n  - dept: Engineering\n    attrition_rate: \"8%\"\n  - dept: Sales\n    attrition_rate: \"14%\"\n  - dept: Marketing\n    attrition_rate: \"5%\"\n  - dept: Product\n    attrition_rate: \"3%\"\n  - dept: Ops\n    attrition_rate: \"11%\"\n```",
+          "tokenUsage": {
+            "total": 906,
+            "prompt": 673,
+            "completion": 233,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2031,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1351",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:07 GMT",
+                "modal-function-call-id": "fc-01KWC8BH9HSV6JPQ4DMX0GQSTE",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_headcount_dashboard_en",
+          "vars": {
+            "request": "callout#freeze-note(text=\"Hiring freeze\")\nchart#headcount-by-dept(variant=bar)\ntable#attrition",
+            "expected_mdma": "```mdma\nid: freeze-note\ntype: callout\nvariant: warning\ntitle: \"Hiring freeze\"\ncontent: \"A hiring freeze is in effect for non-critical roles through the end of the quarter.\"\ndismissible: false\n```\n\n```mdma\nid: headcount-by-dept\ntype: chart\nvariant: bar\ndata: \"{{hr.headcount_by_department}}\"\nxAxis: department\nyAxis: count\n```\n\n```mdma\nid: attrition\ntype: table\ncolumns:\n  - { key: department, header: \"Department\" }\n  - { key: attrition, header: \"Attrition %\" }\ndata:\n  - { department: \"Engineering\", attrition: \"6%\" }\n  - { department: \"Sales\", attrition: \"11%\" }\n```",
+            "domainId": "headcount_dashboard",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 48,
+        "vars": {
+          "request": "callout#freeze-note(text=\"Hiring freeze\")\nchart#headcount-by-dept(variant=bar)\ntable#attrition",
+          "expected_mdma": "```mdma\nid: freeze-note\ntype: callout\nvariant: warning\ntitle: \"Hiring freeze\"\ncontent: \"A hiring freeze is in effect for non-critical roles through the end of the quarter.\"\ndismissible: false\n```\n\n```mdma\nid: headcount-by-dept\ntype: chart\nvariant: bar\ndata: \"{{hr.headcount_by_department}}\"\nxAxis: department\nyAxis: count\n```\n\n```mdma\nid: attrition\ntype: table\ncolumns:\n  - { key: department, header: \"Department\" }\n  - { key: attrition, header: \"Attrition %\" }\ndata:\n  - { department: \"Engineering\", attrition: \"6%\" }\n  - { department: \"Sales\", attrition: \"11%\" }\n```",
+          "domainId": "headcount_dashboard",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1351",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:07 GMT",
+              "modal-function-call-id": "fc-01KWC8BH9HSV6JPQ4DMX0GQSTE",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "07ffa8af-14cc-4a75-ab34-004ab98cf585",
+        "latencyMs": 1027,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"tasklist#offboarding-tasks(action=complete-offboarding)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: offboarding-tasks\ntype: tasklist\nitems:\n  - id: hardware-return\n    text: Collect laptop, charger, and phone\n  - id: access-revocation\n    text: Revoke access to GitHub and Slack\n  - id: security-audit\n    text: Rotate shared API credentials\n  - id: final-interview\n    text: Conduct exit interview with HR\n```",
+          "tokenUsage": {
+            "total": 749,
+            "prompt": 656,
+            "completion": 93,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1027,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "963",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:09 GMT",
+                "modal-function-call-id": "fc-01KWC8BKAGAWDGDZKHJD0FH7HT",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_offboarding_checklist_en",
+          "vars": {
+            "request": "tasklist#offboarding-tasks(action=complete-offboarding)",
+            "expected_mdma": "```mdma\nid: offboarding-tasks\ntype: tasklist\nonComplete: complete-offboarding\nitems:\n  - id: return-laptop\n    text: \"Return company laptop\"\n  - id: revoke-access\n    text: \"Revoke system access\"\n  - id: exit-interview\n    text: \"Complete exit interview\"\n```",
+            "domainId": "offboarding_checklist",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 49,
+        "vars": {
+          "request": "tasklist#offboarding-tasks(action=complete-offboarding)",
+          "expected_mdma": "```mdma\nid: offboarding-tasks\ntype: tasklist\nonComplete: complete-offboarding\nitems:\n  - id: return-laptop\n    text: \"Return company laptop\"\n  - id: revoke-access\n    text: \"Revoke system access\"\n  - id: exit-interview\n    text: \"Complete exit interview\"\n```",
+          "domainId": "offboarding_checklist",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "963",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:09 GMT",
+              "modal-function-call-id": "fc-01KWC8BKAGAWDGDZKHJD0FH7HT",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "0af6617d-12a3-4112-a135-c6153761d269",
+        "latencyMs": 1270,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#fraud-warning(text=\\\"Ostrzeżenie\\\")\\nform#claim-form[policy_number*^:t, incident_date*:d, photos*:f](action=submit-claim)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: fraud-warning\ntype: callout\nvariant: warning\ncontent: Ostrzeżenie\n```\n\n```mdma\nid: claim-form\ntype: form\nfields:\n  - name: policy_number\n    type: text\n    label: Numer polisy\n    required: true\n  - name: incident_date\n    type: date\n    label: Data zdarzenia\n    required: true\n  - name: photos\n    type: file\n    label: Zdjęcia szkody\n    required: true\nonSubmit: submit-claim\n```",
+          "tokenUsage": {
+            "total": 819,
+            "prompt": 691,
+            "completion": 128,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1270,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1050",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:10 GMT",
+                "modal-function-call-id": "fc-01KWC8BMC3M49X0HJN2KKV5W2X",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_insurance_claim_auto_pl",
+          "vars": {
+            "request": "callout#fraud-warning(text=\"Ostrzeżenie\")\nform#claim-form[policy_number*^:t, incident_date*:d, photos*:f](action=submit-claim)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: fraud-warning\ntype: callout\nvariant: error\ntitle: \"Ostrzeżenie\"\ncontent: \"Podanie nieprawdziwych informacji w zgłoszeniu szkody może skutkować odpowiedzialnością karną.\"\ndismissible: false\n```\n\n```mdma\nid: claim-form\ntype: form\nonSubmit: submit-claim\nfields:\n  - name: policy_number\n    type: text\n    label: \"Numer polisy\"\n    required: true\n    sensitive: true\n  - name: incident_date\n    type: date\n    label: \"Data zdarzenia\"\n    required: true\n  - name: photos\n    type: file\n    label: \"Zdjęcia uszkodzeń\"\n    required: true\n```",
+            "domainId": "insurance_claim_auto",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 50,
+        "vars": {
+          "request": "callout#fraud-warning(text=\"Ostrzeżenie\")\nform#claim-form[policy_number*^:t, incident_date*:d, photos*:f](action=submit-claim)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: fraud-warning\ntype: callout\nvariant: error\ntitle: \"Ostrzeżenie\"\ncontent: \"Podanie nieprawdziwych informacji w zgłoszeniu szkody może skutkować odpowiedzialnością karną.\"\ndismissible: false\n```\n\n```mdma\nid: claim-form\ntype: form\nonSubmit: submit-claim\nfields:\n  - name: policy_number\n    type: text\n    label: \"Numer polisy\"\n    required: true\n    sensitive: true\n  - name: incident_date\n    type: date\n    label: \"Data zdarzenia\"\n    required: true\n  - name: photos\n    type: file\n    label: \"Zdjęcia uszkodzeń\"\n    required: true\n```",
+          "domainId": "insurance_claim_auto",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1050",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:10 GMT",
+              "modal-function-call-id": "fc-01KWC8BMC3M49X0HJN2KKV5W2X",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "007bf6f2-3a55-43be-a62c-20d9306ad7af",
+        "latencyMs": 1281,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#policy-form[coverage*:s{basic|standard|premium}, effective_date*:d](action=submit-policy-update)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: policy-form\ntype: form\nfields:\n  - name: coverage\n    type: select\n    label: Coverage Tier\n    required: true\n    options:\n      - label: Basic\n        value: basic\n      - label: Standard\n        value: standard\n      - label: Premium\n        value: premium\n  - name: effective_date\n    type: date\n    label: Effective Date\n    required: true\nonSubmit: submit-policy-update\n```",
+          "tokenUsage": {
+            "total": 781,
+            "prompt": 671,
+            "completion": 110,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1281,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1041",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:12 GMT",
+                "modal-function-call-id": "fc-01KWC8BNJXPVCWNPGJP5YZV6Z5",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_policy_update_en",
+          "vars": {
+            "request": "form#policy-form[coverage*:s{basic|standard|premium}, effective_date*:d](action=submit-policy-update)",
+            "expected_mdma": "```mdma\nid: policy-form\ntype: form\nonSubmit: submit-policy-update\nfields:\n  - name: coverage\n    type: select\n    label: \"Coverage level\"\n    required: true\n    options:\n      - { label: \"Basic\", value: \"basic\" }\n      - { label: \"Standard\", value: \"standard\" }\n      - { label: \"Premium\", value: \"premium\" }\n  - name: effective_date\n    type: date\n    label: \"Effective date\"\n    required: true\n```",
+            "domainId": "policy_update",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 51,
+        "vars": {
+          "request": "form#policy-form[coverage*:s{basic|standard|premium}, effective_date*:d](action=submit-policy-update)",
+          "expected_mdma": "```mdma\nid: policy-form\ntype: form\nonSubmit: submit-policy-update\nfields:\n  - name: coverage\n    type: select\n    label: \"Coverage level\"\n    required: true\n    options:\n      - { label: \"Basic\", value: \"basic\" }\n      - { label: \"Standard\", value: \"standard\" }\n      - { label: \"Premium\", value: \"premium\" }\n  - name: effective_date\n    type: date\n    label: \"Effective date\"\n    required: true\n```",
+          "domainId": "policy_update",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1041",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:12 GMT",
+              "modal-function-call-id": "fc-01KWC8BNJXPVCWNPGJP5YZV6Z5",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "b0a6e5c1-5268-4d7a-a281-63f79ab9b76e",
+        "latencyMs": 1717,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#legal-notice(text=\\\"Informacja prawna\\\")\\nform#beneficiary-form[beneficiary_name*^:t, relationship*:s{malzonek|dziecko|inne}, share_percent*:n](action=submit-beneficiary)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: legal-notice\ntype: callout\nvariant: warning\ncontent: \"Informacja prawna\"\n```\n\n```mdma\nid: beneficiary-form\ntype: form\nfields:\n  - name: beneficiary_name\n    type: text\n    label: Imię i nazwisko beneficjenta\n    required: true\n    sensitive: true\n  - name: relationship\n    type: select\n    label: Stopień pokrewieństwa\n    required: true\n    options:\n      - label: małżonek\n        value: \"małżonek\"\n      - label: dziecko\n        value: \"dziecko\"\n      - label: inne\n        value: \"inne\"\n  - name: share_percent\n    type: number\n    label: Udział procentowy (%)\n    required: true\nonSubmit: submit-beneficiary\n```",
+          "tokenUsage": {
+            "total": 898,
+            "prompt": 703,
+            "completion": 195,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1717,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1307",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:13 GMT",
+                "modal-function-call-id": "fc-01KWC8BPVGYR77GD9ZR36JJX84",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_beneficiary_change_pl",
+          "vars": {
+            "request": "callout#legal-notice(text=\"Informacja prawna\")\nform#beneficiary-form[beneficiary_name*^:t, relationship*:s{malzonek|dziecko|inne}, share_percent*:n](action=submit-beneficiary)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: legal-notice\ntype: callout\nvariant: info\ntitle: \"Informacja prawna\"\ncontent: \"Zmiana uposażonego wchodzi w życie po weryfikacji tożsamości i podpisaniu wniosku.\"\ndismissible: false\n```\n\n```mdma\nid: beneficiary-form\ntype: form\nonSubmit: submit-beneficiary\nfields:\n  - name: beneficiary_name\n    type: text\n    label: \"Imię i nazwisko uposażonego\"\n    required: true\n    sensitive: true\n  - name: relationship\n    type: select\n    label: \"Pokrewieństwo\"\n    required: true\n    options:\n      - { label: \"Małżonek\", value: \"malzonek\" }\n      - { label: \"Dziecko\", value: \"dziecko\" }\n      - { label: \"Inne\", value: \"inne\" }\n  - name: share_percent\n    type: number\n    label: \"Udział (%)\"\n    required: true\n```",
+            "domainId": "beneficiary_change",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 52,
+        "vars": {
+          "request": "callout#legal-notice(text=\"Informacja prawna\")\nform#beneficiary-form[beneficiary_name*^:t, relationship*:s{malzonek|dziecko|inne}, share_percent*:n](action=submit-beneficiary)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: legal-notice\ntype: callout\nvariant: info\ntitle: \"Informacja prawna\"\ncontent: \"Zmiana uposażonego wchodzi w życie po weryfikacji tożsamości i podpisaniu wniosku.\"\ndismissible: false\n```\n\n```mdma\nid: beneficiary-form\ntype: form\nonSubmit: submit-beneficiary\nfields:\n  - name: beneficiary_name\n    type: text\n    label: \"Imię i nazwisko uposażonego\"\n    required: true\n    sensitive: true\n  - name: relationship\n    type: select\n    label: \"Pokrewieństwo\"\n    required: true\n    options:\n      - { label: \"Małżonek\", value: \"malzonek\" }\n      - { label: \"Dziecko\", value: \"dziecko\" }\n      - { label: \"Inne\", value: \"inne\" }\n  - name: share_percent\n    type: number\n    label: \"Udział (%)\"\n    required: true\n```",
+          "domainId": "beneficiary_change",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1307",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:13 GMT",
+              "modal-function-call-id": "fc-01KWC8BPVGYR77GD9ZR36JJX84",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "abfb482b-1af8-4e6a-9f40-6e7679a2f26c",
+        "latencyMs": 1710,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"table#open-claims\\nbutton#assign-next(text=\\\"Assign next claim\\\", action=assign-claim, variant=primary)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: open-claims\ntype: table\ncolumns:\n  - key: claim_id\n    header: Claim ID\n  - key: patient\n    header: Patient\n  - key: type\n    header: Type\n  - key: status\n    header: Status\ndata:\n  - claim_id: C-8821\n    patient: Sarah Jenkins\n    type: Outpatient\n    status: Pending Review\n  - claim_id: C-8822\n    patient: Michael Chen\n    type: Emergency\n    status: Waiting for Documentation\n  - claim_id: C-8825\n    patient: Elena Rodriguez\n    type: Inpatient\n    status: Prior Auth Required\n```\n\n```mdma\nid: assign-claim\ntype: button\ntext: Assign next claim\nonAction: assign-claim-workflow\n```",
+          "tokenUsage": {
+            "total": 863,
+            "prompt": 669,
+            "completion": 194,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1710,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1260",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:15 GMT",
+                "modal-function-call-id": "fc-01KWC8BRJ2H91WACA69WB3K7PB",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_claims_table_en",
+          "vars": {
+            "request": "table#open-claims\nbutton#assign-next(text=\"Assign next claim\", action=assign-claim, variant=primary)",
+            "expected_mdma": "```mdma\nid: open-claims\ntype: table\ncolumns:\n  - { key: claim_id, header: \"Claim\" }\n  - { key: type, header: \"Type\" }\n  - { key: status, header: \"Status\" }\ndata:\n  - { claim_id: \"CLM-1042\", type: \"Auto\", status: \"In review\" }\n  - { claim_id: \"CLM-1043\", type: \"Home\", status: \"Pending docs\" }\n```\n\n```mdma\nid: assign-next\ntype: button\ntext: \"Assign next claim\"\nvariant: primary\nonAction: assign-claim\n```",
+            "domainId": "claims_dashboard",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 53,
+        "vars": {
+          "request": "table#open-claims\nbutton#assign-next(text=\"Assign next claim\", action=assign-claim, variant=primary)",
+          "expected_mdma": "```mdma\nid: open-claims\ntype: table\ncolumns:\n  - { key: claim_id, header: \"Claim\" }\n  - { key: type, header: \"Type\" }\n  - { key: status, header: \"Status\" }\ndata:\n  - { claim_id: \"CLM-1042\", type: \"Auto\", status: \"In review\" }\n  - { claim_id: \"CLM-1043\", type: \"Home\", status: \"Pending docs\" }\n```\n\n```mdma\nid: assign-next\ntype: button\ntext: \"Assign next claim\"\nvariant: primary\nonAction: assign-claim\n```",
+          "domainId": "claims_dashboard",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1260",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:15 GMT",
+              "modal-function-call-id": "fc-01KWC8BRJ2H91WACA69WB3K7PB",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "2f48aa45-1760-4527-b3c0-b85803f1c6f6",
+        "latencyMs": 696,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"chart#claims-by-status(variant=bar)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: claims-by-status\ntype: chart\nlabel: Claims by Status\nvariant: bar\ndata: |\n  Status,Count\n  Pending,12\n  Approved,45\n  Rejected,8\n  Review,5\n```",
+          "tokenUsage": {
+            "total": 712,
+            "prompt": 653,
+            "completion": 59,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 696,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "795",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:16 GMT",
+                "modal-function-call-id": "fc-01KWC8BT898EN958HVRSWERS0E",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_claims_chart_en",
+          "vars": {
+            "request": "chart#claims-by-status(variant=bar)",
+            "expected_mdma": "```mdma\nid: claims-by-status\ntype: chart\nvariant: bar\ndata: \"{{claims.by_status}}\"\nxAxis: status\nyAxis: count\n```",
+            "domainId": "claims_dashboard",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 54,
+        "vars": {
+          "request": "chart#claims-by-status(variant=bar)",
+          "expected_mdma": "```mdma\nid: claims-by-status\ntype: chart\nvariant: bar\ndata: \"{{claims.by_status}}\"\nxAxis: status\nyAxis: count\n```",
+          "domainId": "claims_dashboard",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "795",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:16 GMT",
+              "modal-function-call-id": "fc-01KWC8BT898EN958HVRSWERS0E",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "371e6824-7e59-4e32-bff2-9784ab8b8f3f",
+        "latencyMs": 1390,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#quote-form[property_type*:s{dom|mieszkanie}, build_year*:n, sum_insured*:n](action=request-quote)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: quote-form\ntype: form\ntitle: Formularz wyceny ubezpieczenia\nonSubmit: request-quote\nfields:\n  - name: property_type\n    type: select\n    label: Typ nieruchomości\n    options:\n      - label: Dom\n        value: dom\n      - label: Mieszkanie\n        value: mieszkanie\n  - name: build_year\n    type: number\n    label: Rok budowy\n  - name: sum_insured\n    type: number\n    label: Suma ubezpieczenia\n```",
+          "tokenUsage": {
+            "total": 809,
+            "prompt": 682,
+            "completion": 127,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1390,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1060",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:17 GMT",
+                "modal-function-call-id": "fc-01KWC8BV05SP9WPTNVBV5M5GY0",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_quote_request_pl",
+          "vars": {
+            "request": "form#quote-form[property_type*:s{dom|mieszkanie}, build_year*:n, sum_insured*:n](action=request-quote)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: quote-form\ntype: form\nonSubmit: request-quote\nfields:\n  - name: property_type\n    type: select\n    label: \"Typ nieruchomości\"\n    required: true\n    options:\n      - { label: \"Dom\", value: \"dom\" }\n      - { label: \"Mieszkanie\", value: \"mieszkanie\" }\n  - name: build_year\n    type: number\n    label: \"Rok budowy\"\n    required: true\n  - name: sum_insured\n    type: number\n    label: \"Suma ubezpieczenia\"\n    required: true\n```",
+            "domainId": "quote_request",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 55,
+        "vars": {
+          "request": "form#quote-form[property_type*:s{dom|mieszkanie}, build_year*:n, sum_insured*:n](action=request-quote)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: quote-form\ntype: form\nonSubmit: request-quote\nfields:\n  - name: property_type\n    type: select\n    label: \"Typ nieruchomości\"\n    required: true\n    options:\n      - { label: \"Dom\", value: \"dom\" }\n      - { label: \"Mieszkanie\", value: \"mieszkanie\" }\n  - name: build_year\n    type: number\n    label: \"Rok budowy\"\n    required: true\n  - name: sum_insured\n    type: number\n    label: \"Suma ubezpieczenia\"\n    required: true\n```",
+          "domainId": "quote_request",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1060",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:17 GMT",
+              "modal-function-call-id": "fc-01KWC8BV05SP9WPTNVBV5M5GY0",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "4f0e382d-8eed-45c3-9187-7b2bef66c8c8",
+        "latencyMs": 1669,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#audit-note(text=\\\"Audit\\\")\\nform#status-form[claim_id*:t, status*:s{approved|denied|more-info}](action=update-status)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: audit-note\ntype: callout\nvariant: info\ncontent: \"Audit\"\n```\n\n```mdma\nid: status-form\ntype: form\nfields:\n  - name: claim_id\n    type: text\n    label: Claim ID\n    required: true\n  - name: status\n    type: select\n    label: Status\n    required: true\n    options:\n      - label: \"Approved\"\n        value: \"approved\"\n      - label: \"Denied\"\n        value: \"denied\"\n      - label: \"More Info\"\n        value: \"more-info\"\nonSubmit: update-status\n```",
+          "tokenUsage": {
+            "total": 835,
+            "prompt": 685,
+            "completion": 150,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1669,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1125",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:18 GMT",
+                "modal-function-call-id": "fc-01KWC8BWAP0HHERK8BFS83DN3J",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_claim_webhook_notify_en",
+          "vars": {
+            "request": "callout#audit-note(text=\"Audit\")\nform#status-form[claim_id*:t, status*:s{approved|denied|more-info}](action=update-status)",
+            "expected_mdma": "```mdma\nid: audit-note\ntype: callout\nvariant: info\ntitle: \"Audit\"\ncontent: \"Every status change is written to the immutable audit log.\"\ndismissible: false\n```\n\n```mdma\nid: status-form\ntype: form\nonSubmit: update-status\nfields:\n  - name: claim_id\n    type: text\n    label: \"Claim ID\"\n    required: true\n  - name: status\n    type: select\n    label: \"New status\"\n    required: true\n    options:\n      - { label: \"Approved\", value: \"approved\" }\n      - { label: \"Denied\", value: \"denied\" }\n      - { label: \"More info needed\", value: \"more-info\" }\n```",
+            "domainId": "claim_webhook_notify",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 56,
+        "vars": {
+          "request": "callout#audit-note(text=\"Audit\")\nform#status-form[claim_id*:t, status*:s{approved|denied|more-info}](action=update-status)",
+          "expected_mdma": "```mdma\nid: audit-note\ntype: callout\nvariant: info\ntitle: \"Audit\"\ncontent: \"Every status change is written to the immutable audit log.\"\ndismissible: false\n```\n\n```mdma\nid: status-form\ntype: form\nonSubmit: update-status\nfields:\n  - name: claim_id\n    type: text\n    label: \"Claim ID\"\n    required: true\n  - name: status\n    type: select\n    label: \"New status\"\n    required: true\n    options:\n      - { label: \"Approved\", value: \"approved\" }\n      - { label: \"Denied\", value: \"denied\" }\n      - { label: \"More info needed\", value: \"more-info\" }\n```",
+          "domainId": "claim_webhook_notify",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1125",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:18 GMT",
+              "modal-function-call-id": "fc-01KWC8BWAP0HHERK8BFS83DN3J",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "2ee5bac4-2473-42c7-9e1e-2dc787ed1de0",
+        "latencyMs": 1310,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#return-form[order_id*:t, reason*:s{damaged|wrong-item|no-longer-needed}, prefer_exchange:c](action=submit-return)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: return-form\nfields:\n  - name: order_id\n    type: text\n    label: Order ID\n    required: true\n  - name: reason\n    type: select\n    label: Return Reason\n    required: true\n    options:\n      - label: Damaged\n        value: damaged\n      - label: Wrong Item\n        value: wrong-item\n      - label: No Longer Needed\n        value: no-longer-needed\n  - name: prefer_exchange\n    type: checkbox\n    label: Prefer Exchange?\nonSubmit: submit-return\n```",
+          "tokenUsage": {
+            "total": 819,
+            "prompt": 681,
+            "completion": 138,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1310,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1122",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:20 GMT",
+                "modal-function-call-id": "fc-01KWC8BXZGJJVD57ZGC624YDZD",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_ecommerce_return_en",
+          "vars": {
+            "request": "form#return-form[order_id*:t, reason*:s{damaged|wrong-item|no-longer-needed}, prefer_exchange:c](action=submit-return)",
+            "expected_mdma": "```mdma\nid: return-form\ntype: form\nonSubmit: submit-return\nfields:\n  - name: order_id\n    type: text\n    label: \"Order number\"\n    required: true\n  - name: reason\n    type: select\n    label: \"Reason for return\"\n    required: true\n    options:\n      - { label: \"Damaged\", value: \"damaged\" }\n      - { label: \"Wrong item\", value: \"wrong-item\" }\n      - { label: \"No longer needed\", value: \"no-longer-needed\" }\n  - name: prefer_exchange\n    type: checkbox\n    label: \"I would prefer an exchange\"\n```",
+            "domainId": "ecommerce_return",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 57,
+        "vars": {
+          "request": "form#return-form[order_id*:t, reason*:s{damaged|wrong-item|no-longer-needed}, prefer_exchange:c](action=submit-return)",
+          "expected_mdma": "```mdma\nid: return-form\ntype: form\nonSubmit: submit-return\nfields:\n  - name: order_id\n    type: text\n    label: \"Order number\"\n    required: true\n  - name: reason\n    type: select\n    label: \"Reason for return\"\n    required: true\n    options:\n      - { label: \"Damaged\", value: \"damaged\" }\n      - { label: \"Wrong item\", value: \"wrong-item\" }\n      - { label: \"No longer needed\", value: \"no-longer-needed\" }\n  - name: prefer_exchange\n    type: checkbox\n    label: \"I would prefer an exchange\"\n```",
+          "domainId": "ecommerce_return",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1122",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:20 GMT",
+              "modal-function-call-id": "fc-01KWC8BXZGJJVD57ZGC624YDZD",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "93a4619a-6118-47d9-9e82-5a8ccf82fc4a",
+        "latencyMs": 1443,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#lead-time(text=\\\"Lead time\\\")\\nform#custom-form[design_file*:f, quantity*:n, material*:s{cotton|polyester}](action=submit-order)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: lead-time\ntype: callout\ncontent: Lead time\nvariant: info\n```\n\n```mdma\nid: custom-form\ntype: form\nonSubmit: submit-order\nfields:\n  - name: design_file\n    type: file\n    label: Design File\n    required: true\n  - name: quantity\n    type: number\n    label: Quantity\n    required: true\n  - name: material\n    type: select\n    label: Material\n    required: true\n    options:\n      - label: Cotton\n        value: \"cotton\"\n      - label: Polyester\n        value: \"polyester\"\n```",
+          "tokenUsage": {
+            "total": 834,
+            "prompt": 686,
+            "completion": 148,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1443,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1146",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:21 GMT",
+                "modal-function-call-id": "fc-01KWC8BZ99H022SZX4ZN5FFG4V",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_custom_order_en",
+          "vars": {
+            "request": "callout#lead-time(text=\"Lead time\")\nform#custom-form[design_file*:f, quantity*:n, material*:s{cotton|polyester}](action=submit-order)",
+            "expected_mdma": "```mdma\nid: lead-time\ntype: callout\nvariant: info\ntitle: \"Lead time\"\ncontent: \"Custom orders ship within 3-4 weeks once the design is approved.\"\ndismissible: true\n```\n\n```mdma\nid: custom-form\ntype: form\nonSubmit: submit-order\nfields:\n  - name: design_file\n    type: file\n    label: \"Design file\"\n    required: true\n  - name: quantity\n    type: number\n    label: \"Quantity\"\n    required: true\n  - name: material\n    type: select\n    label: \"Material\"\n    required: true\n    options:\n      - { label: \"Cotton\", value: \"cotton\" }\n      - { label: \"Polyester\", value: \"polyester\" }\n```",
+            "domainId": "custom_order",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 58,
+        "vars": {
+          "request": "callout#lead-time(text=\"Lead time\")\nform#custom-form[design_file*:f, quantity*:n, material*:s{cotton|polyester}](action=submit-order)",
+          "expected_mdma": "```mdma\nid: lead-time\ntype: callout\nvariant: info\ntitle: \"Lead time\"\ncontent: \"Custom orders ship within 3-4 weeks once the design is approved.\"\ndismissible: true\n```\n\n```mdma\nid: custom-form\ntype: form\nonSubmit: submit-order\nfields:\n  - name: design_file\n    type: file\n    label: \"Design file\"\n    required: true\n  - name: quantity\n    type: number\n    label: \"Quantity\"\n    required: true\n  - name: material\n    type: select\n    label: \"Material\"\n    required: true\n    options:\n      - { label: \"Cotton\", value: \"cotton\" }\n      - { label: \"Polyester\", value: \"polyester\" }\n```",
+          "domainId": "custom_order",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1146",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:21 GMT",
+              "modal-function-call-id": "fc-01KWC8BZ99H022SZX4ZN5FFG4V",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "cf0bd84f-6ecd-4868-89e5-d32d657de021",
+        "latencyMs": 1530,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#review-form[rating*:s{1|3|5}, title*:t, body*:ta, verified:c](action=submit-product-review)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: review-form\ntype: form\nfields:\n  - name: rating\n    type: select\n    label: Ocena\n    required: true\n    options:\n      - label: \"1\"\n        value: \"1\"\n      - label: \"3\"\n        value: \"3\"\n      - label: \"5\"\n        value: \"5\"\n  - name: title\n    type: text\n    label: Tytuł recenzji\n    required: true\n  - name: body\n    type: textarea\n    label: Treść recenzji\n    required: true\n  - name: verified\n    type: checkbox\n    label: Produkt zweryfikowany\nonSubmit: submit-product-review\n```",
+          "tokenUsage": {
+            "total": 851,
+            "prompt": 682,
+            "completion": 169,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1530,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1173",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:23 GMT",
+                "modal-function-call-id": "fc-01KWC8C0QARC61SKVDK1S617SA",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_product_review_pl",
+          "vars": {
+            "request": "form#review-form[rating*:s{1|3|5}, title*:t, body*:ta, verified:c](action=submit-product-review)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: review-form\ntype: form\nonSubmit: submit-product-review\nfields:\n  - name: rating\n    type: select\n    label: \"Ocena\"\n    required: true\n    options:\n      - { label: \"1 gwiazdka\", value: \"1\" }\n      - { label: \"3 gwiazdki\", value: \"3\" }\n      - { label: \"5 gwiazdek\", value: \"5\" }\n  - name: title\n    type: text\n    label: \"Tytuł\"\n    required: true\n  - name: body\n    type: textarea\n    label: \"Treść opinii\"\n    required: true\n  - name: verified\n    type: checkbox\n    label: \"Zakup potwierdzony\"\n```",
+            "domainId": "product_review",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 59,
+        "vars": {
+          "request": "form#review-form[rating*:s{1|3|5}, title*:t, body*:ta, verified:c](action=submit-product-review)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: review-form\ntype: form\nonSubmit: submit-product-review\nfields:\n  - name: rating\n    type: select\n    label: \"Ocena\"\n    required: true\n    options:\n      - { label: \"1 gwiazdka\", value: \"1\" }\n      - { label: \"3 gwiazdki\", value: \"3\" }\n      - { label: \"5 gwiazdek\", value: \"5\" }\n  - name: title\n    type: text\n    label: \"Tytuł\"\n    required: true\n  - name: body\n    type: textarea\n    label: \"Treść opinii\"\n    required: true\n  - name: verified\n    type: checkbox\n    label: \"Zakup potwierdzony\"\n```",
+          "domainId": "product_review",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1173",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:23 GMT",
+              "modal-function-call-id": "fc-01KWC8C0QARC61SKVDK1S617SA",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "de456dca-a5a3-4695-87d3-4db456152a1c",
+        "latencyMs": 1589,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#payout-form[account_holder*:t, iban*^:t, schedule*:s{weekly|monthly}](action=submit-payout)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: payout-form\nfields:\n  - name: account_holder\n    type: text\n    label: Account Holder Name\n    required: true\n    sensitive: true\n  - name: iban\n    type: text\n    label: IBAN\n    required: true\n    sensitive: true\n  - name: schedule\n    type: select\n    label: Payout Schedule\n    required: true\n    options:\n      - label: Weekly\n        value: weekly\n      - label: Monthly\n        value: monthly\nonSubmit: submit-payout\n```\n\n```mdma\ntype: webhook\nid: submit-payout\nurl: https://api.payments.internal/v1/payouts\ntrigger: submit-payout\n```",
+          "tokenUsage": {
+            "total": 849,
+            "prompt": 676,
+            "completion": 173,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1589,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1225",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:24 GMT",
+                "modal-function-call-id": "fc-01KWC8C28MWSXPK75KFDS3ZRZY",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_seller_payout_en",
+          "vars": {
+            "request": "form#payout-form[account_holder*:t, iban*^:t, schedule*:s{weekly|monthly}](action=submit-payout)",
+            "expected_mdma": "```mdma\nid: payout-form\ntype: form\nonSubmit: submit-payout\nfields:\n  - name: account_holder\n    type: text\n    label: \"Account holder\"\n    required: true\n  - name: iban\n    type: text\n    label: \"IBAN\"\n    required: true\n    sensitive: true\n  - name: schedule\n    type: select\n    label: \"Payout schedule\"\n    required: true\n    options:\n      - { label: \"Weekly\", value: \"weekly\" }\n      - { label: \"Monthly\", value: \"monthly\" }\n```",
+            "domainId": "seller_payout",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 60,
+        "vars": {
+          "request": "form#payout-form[account_holder*:t, iban*^:t, schedule*:s{weekly|monthly}](action=submit-payout)",
+          "expected_mdma": "```mdma\nid: payout-form\ntype: form\nonSubmit: submit-payout\nfields:\n  - name: account_holder\n    type: text\n    label: \"Account holder\"\n    required: true\n  - name: iban\n    type: text\n    label: \"IBAN\"\n    required: true\n    sensitive: true\n  - name: schedule\n    type: select\n    label: \"Payout schedule\"\n    required: true\n    options:\n      - { label: \"Weekly\", value: \"weekly\" }\n      - { label: \"Monthly\", value: \"monthly\" }\n```",
+          "domainId": "seller_payout",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1225",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:24 GMT",
+              "modal-function-call-id": "fc-01KWC8C28MWSXPK75KFDS3ZRZY",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "3422e051-1cb7-4560-8b4b-bc5fabe25647",
+        "latencyMs": 1950,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#low-stock(text=\\\"Low stock\\\")\\nchart#revenue-over-time(variant=line)\\ntable#top-products\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: low-stock\ntype: callout\ncontent: \"Low stock\"\nvariant: warning\n```\n\n```mdma\nid: revenue-over-time\ntype: chart\nlabel: Revenue Over Time\nvariant: line\ndata: |\n  Month,Revenue\n  Jan,4500\n  Feb,5200\n  Mar,4800\n  Apr,6100\n```\n\n```mdma\nid: top-products\ntype: table\ncolumns:\n  - key: name\n    header: Product Name\n  - key: sales\n    header: Units Sold\n  - key: stock\n    header: Stock Level\ndata:\n  - name: Widget A\n    sales: 1200\n    stock: 15\n  - name: Gadget B\n    sales: 850\n    stock: 45\n  - name: Tool C\n    sales: 600\n    stock: 8\n```",
+          "tokenUsage": {
+            "total": 894,
+            "prompt": 672,
+            "completion": 222,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1950,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1218",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:26 GMT",
+                "modal-function-call-id": "fc-01KWC8C3TFCF5XQEZSQTY2CGK2",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_sales_dashboard_en",
+          "vars": {
+            "request": "callout#low-stock(text=\"Low stock\")\nchart#revenue-over-time(variant=line)\ntable#top-products",
+            "expected_mdma": "```mdma\nid: low-stock\ntype: callout\nvariant: warning\ntitle: \"Low stock\"\ncontent: \"Three best-selling products are below the reorder threshold.\"\ndismissible: false\n```\n\n```mdma\nid: revenue-over-time\ntype: chart\nvariant: line\ndata: \"{{sales.revenue_daily}}\"\nxAxis: date\nyAxis: revenue\n```\n\n```mdma\nid: top-products\ntype: table\ncolumns:\n  - { key: product, header: \"Product\" }\n  - { key: units, header: \"Units\" }\n  - { key: revenue, header: \"Revenue\" }\ndata:\n  - { product: \"Wireless earbuds\", units: 412, revenue: 28800 }\n  - { product: \"Phone case\", units: 980, revenue: 11760 }\n```",
+            "domainId": "sales_dashboard",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 61,
+        "vars": {
+          "request": "callout#low-stock(text=\"Low stock\")\nchart#revenue-over-time(variant=line)\ntable#top-products",
+          "expected_mdma": "```mdma\nid: low-stock\ntype: callout\nvariant: warning\ntitle: \"Low stock\"\ncontent: \"Three best-selling products are below the reorder threshold.\"\ndismissible: false\n```\n\n```mdma\nid: revenue-over-time\ntype: chart\nvariant: line\ndata: \"{{sales.revenue_daily}}\"\nxAxis: date\nyAxis: revenue\n```\n\n```mdma\nid: top-products\ntype: table\ncolumns:\n  - { key: product, header: \"Product\" }\n  - { key: units, header: \"Units\" }\n  - { key: revenue, header: \"Revenue\" }\ndata:\n  - { product: \"Wireless earbuds\", units: 412, revenue: 28800 }\n  - { product: \"Phone case\", units: 980, revenue: 11760 }\n```",
+          "domainId": "sales_dashboard",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1218",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:26 GMT",
+              "modal-function-call-id": "fc-01KWC8C3TFCF5XQEZSQTY2CGK2",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "89e9c674-6a0f-4ca0-9869-e98b087bc625",
+        "latencyMs": 764,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#fee-note(text=\\\"Opłata\\\")\\napproval-gate#office-review(text=\\\"Decyzja urzędu\\\")\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: fee-note\ntype: callout\ncontent: \"Opłata\"\nvariant: info\n```\n\n```mdma\nid: office-review\ntype: approval-gate\ntitle: \"Decyzja urzędu\"\n```",
+          "tokenUsage": {
+            "total": 735,
+            "prompt": 677,
+            "completion": 58,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 764,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "791",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:28 GMT",
+                "modal-function-call-id": "fc-01KWC8C5R9DK0EWMC4K42HBF4Y",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_permit_application_pl",
+          "vars": {
+            "request": "callout#fee-note(text=\"Opłata\")\napproval-gate#office-review(text=\"Decyzja urzędu\")\n@lang: pl",
+            "expected_mdma": "```mdma\nid: fee-note\ntype: callout\nvariant: info\ntitle: \"Opłata\"\ncontent: \"Opłata skarbowa za wydanie pozwolenia jest naliczana po wstępnej weryfikacji wniosku.\"\ndismissible: false\n```\n\n```mdma\nid: office-review\ntype: approval-gate\ntitle: \"Decyzja urzędu\"\nrequiredApprovers: 1\nallowedRoles: [urzednik]\nonApprove: issue-permit\nonDeny: reject-permit\nrequireReason: false\n```",
+            "domainId": "permit_application",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 62,
+        "vars": {
+          "request": "callout#fee-note(text=\"Opłata\")\napproval-gate#office-review(text=\"Decyzja urzędu\")\n@lang: pl",
+          "expected_mdma": "```mdma\nid: fee-note\ntype: callout\nvariant: info\ntitle: \"Opłata\"\ncontent: \"Opłata skarbowa za wydanie pozwolenia jest naliczana po wstępnej weryfikacji wniosku.\"\ndismissible: false\n```\n\n```mdma\nid: office-review\ntype: approval-gate\ntitle: \"Decyzja urzędu\"\nrequiredApprovers: 1\nallowedRoles: [urzednik]\nonApprove: issue-permit\nonDeny: reject-permit\nrequireReason: false\n```",
+          "domainId": "permit_application",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "791",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:28 GMT",
+              "modal-function-call-id": "fc-01KWC8C5R9DK0EWMC4K42HBF4Y",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "40665d16-ae5c-45de-921e-b596e5281190",
+        "latencyMs": 2245,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#deadline(text=\\\"Filing deadline\\\")\\nform#tax-form[tax_id*^:t, filing_status*:s{single|married-joint}](action=submit-return)\\ntable#income-items\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: deadline-notice\ntype: callout\ncontent: \"Filing deadline is April 15th.\"\nvariant: warning\n```\n\n```mdma\nid: tax-form\ntype: form\nfields:\n  - name: tax_id\n    type: text\n    label: Tax ID\n    required: true\n  - name: filing_status\n    type: select\n    label: Filing Status\n    required: true\n    options:\n      - label: \"Single\"\n        value: \"single\"\n      - label: \"Married Filing Jointly\"\n        value: \"married-joint\"\nonSubmit: submit-return\n```\n\n```mdma\nid: income-items\ntype: table\ncolumns:\n  - key: source\n    header: Source of Income\n  - key: amount\n    header: Amount\ndata:\n  - source: Salary\n    amount: 50000\n  - source: Dividends\n    amount: 1200\n  - source: Freelance\n    amount: 5000\n```",
+          "tokenUsage": {
+            "total": 933,
+            "prompt": 692,
+            "completion": 241,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2245,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1393",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:29 GMT",
+                "modal-function-call-id": "fc-01KWC8C6JCXR5J8VN8X3JTEWEW",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_tax_filing_en",
+          "vars": {
+            "request": "callout#deadline(text=\"Filing deadline\")\nform#tax-form[tax_id*^:t, filing_status*:s{single|married-joint}](action=submit-return)\ntable#income-items",
+            "expected_mdma": "```mdma\nid: deadline\ntype: callout\nvariant: warning\ntitle: \"Filing deadline\"\ncontent: \"Returns submitted after the deadline may incur a late-filing penalty.\"\ndismissible: false\n```\n\n```mdma\nid: tax-form\ntype: form\nonSubmit: submit-return\nfields:\n  - name: tax_id\n    type: text\n    label: \"Tax identification number\"\n    required: true\n    sensitive: true\n  - name: filing_status\n    type: select\n    label: \"Filing status\"\n    required: true\n    options:\n      - { label: \"Single\", value: \"single\" }\n      - { label: \"Married joint\", value: \"married-joint\" }\n```\n\n```mdma\nid: income-items\ntype: table\ncolumns:\n  - { key: source, header: \"Source\" }\n  - { key: amount, header: \"Amount\" }\ndata:\n  - { source: \"Employment\", amount: 62000 }\n  - { source: \"Dividends\", amount: 1400 }\n```",
+            "domainId": "tax_filing",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 63,
+        "vars": {
+          "request": "callout#deadline(text=\"Filing deadline\")\nform#tax-form[tax_id*^:t, filing_status*:s{single|married-joint}](action=submit-return)\ntable#income-items",
+          "expected_mdma": "```mdma\nid: deadline\ntype: callout\nvariant: warning\ntitle: \"Filing deadline\"\ncontent: \"Returns submitted after the deadline may incur a late-filing penalty.\"\ndismissible: false\n```\n\n```mdma\nid: tax-form\ntype: form\nonSubmit: submit-return\nfields:\n  - name: tax_id\n    type: text\n    label: \"Tax identification number\"\n    required: true\n    sensitive: true\n  - name: filing_status\n    type: select\n    label: \"Filing status\"\n    required: true\n    options:\n      - { label: \"Single\", value: \"single\" }\n      - { label: \"Married joint\", value: \"married-joint\" }\n```\n\n```mdma\nid: income-items\ntype: table\ncolumns:\n  - { key: source, header: \"Source\" }\n  - { key: amount, header: \"Amount\" }\ndata:\n  - { source: \"Employment\", amount: 62000 }\n  - { source: \"Dividends\", amount: 1400 }\n```",
+          "domainId": "tax_filing",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1393",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:29 GMT",
+              "modal-function-call-id": "fc-01KWC8C6JCXR5J8VN8X3JTEWEW",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "c53db55b-936d-4eb2-b2ac-9c233dba1983",
+        "latencyMs": 1844,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#service-form[category*:s{droga|oswietlenie|odpady|zielen}, location*:t, photo:f](action=submit-service-request)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: service-form\ntype: form\nlabel: Zgłoszenie usterki / zapytania\nfields:\n  - name: category\n    type: select\n    label: Kategoria zgłoszenia\n    required: true\n    options:\n      - label: Droga\n        value: droga\n      - label: Oświetlenie\n        value: oswietlenie\n      - label: Odpady\n        value: odpady\n      - label: Zieleń\n        value: zielen\n  - name: location\n    type: text\n    label: Lokalizacja\n    required: true\n  - name: photo\n    type: file\n    label: Zdjęcie (opcjonalnie)\nonSubmit: submit-service-request\n```\n\n```mdma\nid: submit-service-request\ntype: webhook\nurl: https://api.example.com/service-request\ntrigger: submit-service-request\n```",
+          "tokenUsage": {
+            "total": 895,
+            "prompt": 685,
+            "completion": 210,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1844,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1341",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:31 GMT",
+                "modal-function-call-id": "fc-01KWC8C8QQ06GZQ3B19GE1QDEY",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_service_request_pl",
+          "vars": {
+            "request": "form#service-form[category*:s{droga|oswietlenie|odpady|zielen}, location*:t, photo:f](action=submit-service-request)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: service-form\ntype: form\nonSubmit: submit-service-request\nfields:\n  - name: category\n    type: select\n    label: \"Kategoria\"\n    required: true\n    options:\n      - { label: \"Droga\", value: \"droga\" }\n      - { label: \"Oświetlenie\", value: \"oswietlenie\" }\n      - { label: \"Odpady\", value: \"odpady\" }\n      - { label: \"Zieleń\", value: \"zielen\" }\n  - name: location\n    type: text\n    label: \"Lokalizacja\"\n    required: true\n  - name: photo\n    type: file\n    label: \"Zdjęcie\"\n```",
+            "domainId": "service_request_311",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 64,
+        "vars": {
+          "request": "form#service-form[category*:s{droga|oswietlenie|odpady|zielen}, location*:t, photo:f](action=submit-service-request)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: service-form\ntype: form\nonSubmit: submit-service-request\nfields:\n  - name: category\n    type: select\n    label: \"Kategoria\"\n    required: true\n    options:\n      - { label: \"Droga\", value: \"droga\" }\n      - { label: \"Oświetlenie\", value: \"oswietlenie\" }\n      - { label: \"Odpady\", value: \"odpady\" }\n      - { label: \"Zieleń\", value: \"zielen\" }\n  - name: location\n    type: text\n    label: \"Lokalizacja\"\n    required: true\n  - name: photo\n    type: file\n    label: \"Zdjęcie\"\n```",
+          "domainId": "service_request_311",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1341",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:31 GMT",
+              "modal-function-call-id": "fc-01KWC8C8QQ06GZQ3B19GE1QDEY",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "dcb4ff82-d63d-4743-8fb6-c19524041a61",
+        "latencyMs": 760,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#eligibility(text=\\\"Eligibility\\\")\\napproval-gate#caseworker-review(text=\\\"Caseworker review\\\")\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: eligibility\ntype: callout\ncontent: Eligibility\nvariant: info\n```\n\n```mdma\nid: caseworker-review\ntype: approval-gate\ntitle: Caseworker review\n```",
+          "tokenUsage": {
+            "total": 717,
+            "prompt": 669,
+            "completion": 48,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 760,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "796",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:33 GMT",
+                "modal-function-call-id": "fc-01KWC8CAJ4ZE0H9554530V9N4N",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_benefits_enrollment_en",
+          "vars": {
+            "request": "callout#eligibility(text=\"Eligibility\")\napproval-gate#caseworker-review(text=\"Caseworker review\")",
+            "expected_mdma": "```mdma\nid: eligibility\ntype: callout\nvariant: info\ntitle: \"Eligibility\"\ncontent: \"Benefits are means-tested. Provide accurate household income to determine eligibility.\"\ndismissible: false\n```\n\n```mdma\nid: caseworker-review\ntype: approval-gate\ntitle: \"Caseworker review\"\nallowedRoles: [caseworker]\nonApprove: approve-benefits\nonDeny: deny-benefits\nrequireReason: true\n```",
+            "domainId": "benefits_enrollment",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 65,
+        "vars": {
+          "request": "callout#eligibility(text=\"Eligibility\")\napproval-gate#caseworker-review(text=\"Caseworker review\")",
+          "expected_mdma": "```mdma\nid: eligibility\ntype: callout\nvariant: info\ntitle: \"Eligibility\"\ncontent: \"Benefits are means-tested. Provide accurate household income to determine eligibility.\"\ndismissible: false\n```\n\n```mdma\nid: caseworker-review\ntype: approval-gate\ntitle: \"Caseworker review\"\nallowedRoles: [caseworker]\nonApprove: approve-benefits\nonDeny: deny-benefits\nrequireReason: true\n```",
+          "domainId": "benefits_enrollment",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "796",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:33 GMT",
+              "modal-function-call-id": "fc-01KWC8CAJ4ZE0H9554530V9N4N",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "ed34c126-968b-4aaa-b6c9-576000d67559",
+        "latencyMs": 1016,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#foia-form[records_description*:ta, date_from:d, date_to:d](action=submit-foia)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: foia-form\nonSubmit: submit-foia\nfields:\n  - name: records_description\n    type: textarea\n    label: Description of Records\n    required: true\n  - name: date_from\n    type: date\n    label: Start Date\n  - name: date_to\n    type: date\n    label: End Date\n```",
+          "tokenUsage": {
+            "total": 763,
+            "prompt": 673,
+            "completion": 90,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1016,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "922",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:34 GMT",
+                "modal-function-call-id": "fc-01KWC8CBBSDNEET9KPGBWNEY2Q",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_foia_request_en",
+          "vars": {
+            "request": "form#foia-form[records_description*:ta, date_from:d, date_to:d](action=submit-foia)",
+            "expected_mdma": "```mdma\nid: foia-form\ntype: form\nonSubmit: submit-foia\nfields:\n  - name: records_description\n    type: textarea\n    label: \"Records requested\"\n    required: true\n  - name: date_from\n    type: date\n    label: \"From date\"\n  - name: date_to\n    type: date\n    label: \"To date\"\n```",
+            "domainId": "foia_request",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 66,
+        "vars": {
+          "request": "form#foia-form[records_description*:ta, date_from:d, date_to:d](action=submit-foia)",
+          "expected_mdma": "```mdma\nid: foia-form\ntype: form\nonSubmit: submit-foia\nfields:\n  - name: records_description\n    type: textarea\n    label: \"Records requested\"\n    required: true\n  - name: date_from\n    type: date\n    label: \"From date\"\n  - name: date_to\n    type: date\n    label: \"To date\"\n```",
+          "domainId": "foia_request",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "922",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:34 GMT",
+              "modal-function-call-id": "fc-01KWC8CBBSDNEET9KPGBWNEY2Q",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "6e67edd6-7378-446c-aedf-a7ae944cb283",
+        "latencyMs": 3357,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#enrollment-form[course*:s{algorytmy|bazy-danych|sieci}, semester*:s{zimowy|letni}](action=submit-enrollment)\\ntable#prerequisites\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: prerequisites-table\ntype: table\ncolumns:\n  - key: subject\n    header: Przedmiot wymagany\n  - key: credit_points\n    header: Punkty ECTS\ndata:\n  - subject: Wstęp do programowania\n    credit_points: 5\n  - subject: Matematyka dyskretna\n    credit_points: 4\n  - subject: Struktury danych\n    credit_points: 5\n```\n\n```mdma\nid: enrollment-form\ntype: form\nonSubmit: submit-enrollment\nfields:\n  - name: course\n    type: select\n    label: Wybierz kurs\n    required: true\n    options:\n      - label: \"Algorytmy\"\n        value: \"algorytmy\"\n      - label: \"Bazy danych\"\n        value: \"bazy-danych\"\n      - label: \"Sieci\"\n        value: \"sieci\"\n  - name: semester\n    type: select\n    label: Semestr\n    required: true\n    options:\n      - label: \"Zimowy\"\n        value: \"zimowy\"\n      - label: \"Letni\"\n        value: \"letni\"\n  - name: full_name\n    type: text\n    label: Imię i nazwisko\n    required: true\n    sensitive: true\n  - name: student_email\n    type: email\n    label: Adres e-mail\n    required: true\n    sensitive: true\n```\n\n```mdma\nid: submit-enrollment\ntype: webhook\nurl: https://api.university.edu/v1/enrollment\ntrigger: submit-enrollment\n```",
+          "tokenUsage": {
+            "total": 1077,
+            "prompt": 693,
+            "completion": 384,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 3357,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1867",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:35 GMT",
+                "modal-function-call-id": "fc-01KWC8CCAR6E150F7PJ9MSDVYK",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_course_enrollment_pl",
+          "vars": {
+            "request": "form#enrollment-form[course*:s{algorytmy|bazy-danych|sieci}, semester*:s{zimowy|letni}](action=submit-enrollment)\ntable#prerequisites\n@lang: pl",
+            "expected_mdma": "```mdma\nid: enrollment-form\ntype: form\nonSubmit: submit-enrollment\nfields:\n  - name: course\n    type: select\n    label: \"Kurs\"\n    required: true\n    options:\n      - { label: \"Algorytmy\", value: \"algorytmy\" }\n      - { label: \"Bazy danych\", value: \"bazy-danych\" }\n      - { label: \"Sieci\", value: \"sieci\" }\n  - name: semester\n    type: select\n    label: \"Semestr\"\n    required: true\n    options:\n      - { label: \"Zimowy\", value: \"zimowy\" }\n      - { label: \"Letni\", value: \"letni\" }\n```\n\n```mdma\nid: prerequisites\ntype: table\ncolumns:\n  - { key: course, header: \"Wymagany kurs\" }\n  - { key: status, header: \"Status\" }\ndata:\n  - { course: \"Wstęp do programowania\", status: \"Zaliczony\" }\n  - { course: \"Matematyka dyskretna\", status: \"Zaliczony\" }\n```",
+            "domainId": "course_enrollment",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 67,
+        "vars": {
+          "request": "form#enrollment-form[course*:s{algorytmy|bazy-danych|sieci}, semester*:s{zimowy|letni}](action=submit-enrollment)\ntable#prerequisites\n@lang: pl",
+          "expected_mdma": "```mdma\nid: enrollment-form\ntype: form\nonSubmit: submit-enrollment\nfields:\n  - name: course\n    type: select\n    label: \"Kurs\"\n    required: true\n    options:\n      - { label: \"Algorytmy\", value: \"algorytmy\" }\n      - { label: \"Bazy danych\", value: \"bazy-danych\" }\n      - { label: \"Sieci\", value: \"sieci\" }\n  - name: semester\n    type: select\n    label: \"Semestr\"\n    required: true\n    options:\n      - { label: \"Zimowy\", value: \"zimowy\" }\n      - { label: \"Letni\", value: \"letni\" }\n```\n\n```mdma\nid: prerequisites\ntype: table\ncolumns:\n  - { key: course, header: \"Wymagany kurs\" }\n  - { key: status, header: \"Status\" }\ndata:\n  - { course: \"Wstęp do programowania\", status: \"Zaliczony\" }\n  - { course: \"Matematyka dyskretna\", status: \"Zaliczony\" }\n```",
+          "domainId": "course_enrollment",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1867",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:35 GMT",
+              "modal-function-call-id": "fc-01KWC8CCAR6E150F7PJ9MSDVYK",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "d7bffe2a-4090-4898-b7ed-820502609f4b",
+        "latencyMs": 1117,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#feedback-form[overall*:s{poor|average|excellent}, anonymous:c, comments:ta](action=submit-feedback)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: feedback-form\nonSubmit: submit-feedback\nfields:\n  - name: overall\n    type: select\n    label: Overall Satisfaction\n    options:\n      - label: Poor\n        value: poor\n      - label: Average\n        value: average\n      - label: Excellent\n        value: excellent\n  - name: anonymous\n    type: checkbox\n    label: Submit anonymously\n  - name: comments\n    type: textarea\n    label: Additional Comments\n```",
+          "tokenUsage": {
+            "total": 784,
+            "prompt": 671,
+            "completion": 113,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1117,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1079",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:38 GMT",
+                "modal-function-call-id": "fc-01KWC8CFSBA57FKME0DGZVV2R4",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_course_feedback_en",
+          "vars": {
+            "request": "form#feedback-form[overall*:s{poor|average|excellent}, anonymous:c, comments:ta](action=submit-feedback)",
+            "expected_mdma": "```mdma\nid: feedback-form\ntype: form\nonSubmit: submit-feedback\nfields:\n  - name: overall\n    type: select\n    label: \"Overall rating\"\n    required: true\n    options:\n      - { label: \"Poor\", value: \"poor\" }\n      - { label: \"Average\", value: \"average\" }\n      - { label: \"Excellent\", value: \"excellent\" }\n  - name: anonymous\n    type: checkbox\n    label: \"Submit anonymously\"\n  - name: comments\n    type: textarea\n    label: \"Comments\"\n```",
+            "domainId": "course_feedback",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 68,
+        "vars": {
+          "request": "form#feedback-form[overall*:s{poor|average|excellent}, anonymous:c, comments:ta](action=submit-feedback)",
+          "expected_mdma": "```mdma\nid: feedback-form\ntype: form\nonSubmit: submit-feedback\nfields:\n  - name: overall\n    type: select\n    label: \"Overall rating\"\n    required: true\n    options:\n      - { label: \"Poor\", value: \"poor\" }\n      - { label: \"Average\", value: \"average\" }\n      - { label: \"Excellent\", value: \"excellent\" }\n  - name: anonymous\n    type: checkbox\n    label: \"Submit anonymously\"\n  - name: comments\n    type: textarea\n    label: \"Comments\"\n```",
+          "domainId": "course_feedback",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1079",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:38 GMT",
+              "modal-function-call-id": "fc-01KWC8CFSBA57FKME0DGZVV2R4",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "860e527a-6899-4ed0-8790-b365add6dae1",
+        "latencyMs": 1631,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#scholarship-note(text=\\\"Need-based\\\")\\nform#scholarship-form[transcript*:f, household_income*^:n, essay*:ta](action=submit-scholarship)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: scholarship-note\ntype: callout\nvariant: info\ncontent: Need-based scholarship application. Requires official transcripts and a personal essay.\n```\n\n```mdma\nid: scholarship-form\ntype: form\nonSubmit: submit-scholarship\nfields:\n  - name: transcript\n    type: file\n    label: Official Transcript\n    required: true\n  - name: household_income\n    type: number\n    label: Annual Household Income\n    required: true\n  - name: essay\n    type: textarea\n    label: Personal Essay\n    required: true\n```",
+          "tokenUsage": {
+            "total": 819,
+            "prompt": 686,
+            "completion": 133,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1631,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1157",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:39 GMT",
+                "modal-function-call-id": "fc-01KWC8CGXR6ZM0JV1XGXQ78HZ1",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_scholarship_application_en",
+          "vars": {
+            "request": "callout#scholarship-note(text=\"Need-based\")\nform#scholarship-form[transcript*:f, household_income*^:n, essay*:ta](action=submit-scholarship)",
+            "expected_mdma": "```mdma\nid: scholarship-note\ntype: callout\nvariant: info\ntitle: \"Need-based\"\ncontent: \"Awards are need-based. Financial information is reviewed confidentially by the committee.\"\ndismissible: false\n```\n\n```mdma\nid: scholarship-form\ntype: form\nonSubmit: submit-scholarship\nfields:\n  - name: transcript\n    type: file\n    label: \"Transcript\"\n    required: true\n  - name: household_income\n    type: number\n    label: \"Household income\"\n    required: true\n    sensitive: true\n  - name: essay\n    type: textarea\n    label: \"Personal statement\"\n    required: true\n```",
+            "domainId": "scholarship_application",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 69,
+        "vars": {
+          "request": "callout#scholarship-note(text=\"Need-based\")\nform#scholarship-form[transcript*:f, household_income*^:n, essay*:ta](action=submit-scholarship)",
+          "expected_mdma": "```mdma\nid: scholarship-note\ntype: callout\nvariant: info\ntitle: \"Need-based\"\ncontent: \"Awards are need-based. Financial information is reviewed confidentially by the committee.\"\ndismissible: false\n```\n\n```mdma\nid: scholarship-form\ntype: form\nonSubmit: submit-scholarship\nfields:\n  - name: transcript\n    type: file\n    label: \"Transcript\"\n    required: true\n  - name: household_income\n    type: number\n    label: \"Household income\"\n    required: true\n    sensitive: true\n  - name: essay\n    type: textarea\n    label: \"Personal statement\"\n    required: true\n```",
+          "domainId": "scholarship_application",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1157",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:39 GMT",
+              "modal-function-call-id": "fc-01KWC8CGXR6ZM0JV1XGXQ78HZ1",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "d79c480a-1040-40ea-8cd9-926c5b8a3805",
+        "latencyMs": 1438,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#at-risk(text=\\\"At risk\\\")\\ntable#assignments\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: at-risk\ntype: callout\nvariant: warning\ncontent: At risk\n```\n\n```mdma\nid: assignments\ntype: table\ncolumns:\n  - key: task\n    header: Task\n  - key: owner\n    header: Owner\n  - key: due\n    header: Due Date\ndata:\n  - task: Database Migration\n    owner: Alice\n    due: 2023-12-01\n  - task: API Authentication\n    owner: Bob\n    due: 2023-12-15\n```",
+          "tokenUsage": {
+            "total": 791,
+            "prompt": 657,
+            "completion": 134,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1438,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1009",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:41 GMT",
+                "modal-function-call-id": "fc-01KWC8CJGQYDH3V7J4ACXBVKCN",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_student_progress_table_en",
+          "vars": {
+            "request": "callout#at-risk(text=\"At risk\")\ntable#assignments",
+            "expected_mdma": "```mdma\nid: at-risk\ntype: callout\nvariant: warning\ntitle: \"At risk\"\ncontent: \"This student is below the passing threshold in two courses and may need additional support.\"\ndismissible: false\n```\n\n```mdma\nid: assignments\ntype: table\ncolumns:\n  - { key: assignment, header: \"Assignment\" }\n  - { key: grade, header: \"Grade\" }\n  - { key: status, header: \"Status\" }\ndata:\n  - { assignment: \"Essay 1\", grade: \"B\", status: \"Graded\" }\n  - { assignment: \"Midterm\", grade: \"D\", status: \"Graded\" }\n```",
+            "domainId": "student_progress",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 70,
+        "vars": {
+          "request": "callout#at-risk(text=\"At risk\")\ntable#assignments",
+          "expected_mdma": "```mdma\nid: at-risk\ntype: callout\nvariant: warning\ntitle: \"At risk\"\ncontent: \"This student is below the passing threshold in two courses and may need additional support.\"\ndismissible: false\n```\n\n```mdma\nid: assignments\ntype: table\ncolumns:\n  - { key: assignment, header: \"Assignment\" }\n  - { key: grade, header: \"Grade\" }\n  - { key: status, header: \"Status\" }\ndata:\n  - { assignment: \"Essay 1\", grade: \"B\", status: \"Graded\" }\n  - { assignment: \"Midterm\", grade: \"D\", status: \"Graded\" }\n```",
+          "domainId": "student_progress",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1009",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:41 GMT",
+              "modal-function-call-id": "fc-01KWC8CJGQYDH3V7J4ACXBVKCN",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "d62d73c5-174c-4dc9-9ca4-3bae897d7b3a",
+        "latencyMs": 789,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"chart#grades-over-term(variant=line)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: grades-over-term\ntype: chart\nlabel: Grades Over Term\nvariant: line\ndata: |\n  Term,Grade\n  Term 1,82\n  Term 2,88\n  Term 3,85\n  Term 4,92\n```",
+          "tokenUsage": {
+            "total": 722,
+            "prompt": 653,
+            "completion": 69,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 789,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "791",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:42 GMT",
+                "modal-function-call-id": "fc-01KWC8CKYGK6HN6DC76E03KKAK",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_student_progress_chart_en",
+          "vars": {
+            "request": "chart#grades-over-term(variant=line)",
+            "expected_mdma": "```mdma\nid: grades-over-term\ntype: chart\nvariant: line\ndata: \"{{student.grades_by_week}}\"\nxAxis: week\nyAxis: grade\n```",
+            "domainId": "student_progress",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 71,
+        "vars": {
+          "request": "chart#grades-over-term(variant=line)",
+          "expected_mdma": "```mdma\nid: grades-over-term\ntype: chart\nvariant: line\ndata: \"{{student.grades_by_week}}\"\nxAxis: week\nyAxis: grade\n```",
+          "domainId": "student_progress",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "791",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:42 GMT",
+              "modal-function-call-id": "fc-01KWC8CKYGK6HN6DC76E03KKAK",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "0dbc6032-b90d-4e0c-b001-90da0d2bd784",
+        "latencyMs": 1621,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#visa-fee(text=\\\"Opłata wizowa\\\")\\nform#visa-form[passport_number*^:t, photo*:f, purpose*:s{turystyka|biznes|studia}](action=submit-visa)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: visa-fee\ntype: callout\ncontent: Opłata wizowa\nvariant: info\n```\n\n```mdma\nid: visa-form\ntype: form\nonSubmit: submit-visa\nfields:\n  - name: passport_number\n    type: text\n    label: Numer paszportu\n    required: true\n  - name: photo\n    type: file\n    label: Zdjęcie\n    required: true\n  - name: purpose\n    type: select\n    label: Cel podróży\n    required: true\n    options:\n      - label: Turystyka\n        value: turystyka\n      - label: Biznes\n        value: biznes\n      - label: Studia\n        value: studia\n```",
+          "tokenUsage": {
+            "total": 872,
+            "prompt": 701,
+            "completion": 171,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1621,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1192",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:43 GMT",
+                "modal-function-call-id": "fc-01KWC8CMRTPEMF3235H8EPZQ82",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_visa_application_pl",
+          "vars": {
+            "request": "callout#visa-fee(text=\"Opłata wizowa\")\nform#visa-form[passport_number*^:t, photo*:f, purpose*:s{turystyka|biznes|studia}](action=submit-visa)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: visa-fee\ntype: callout\nvariant: info\ntitle: \"Opłata wizowa\"\ncontent: \"Opłata wizowa jest bezzwrotna, niezależnie od decyzji konsulatu.\"\ndismissible: false\n```\n\n```mdma\nid: visa-form\ntype: form\nonSubmit: submit-visa\nfields:\n  - name: passport_number\n    type: text\n    label: \"Numer paszportu\"\n    required: true\n    sensitive: true\n  - name: photo\n    type: file\n    label: \"Zdjęcie\"\n    required: true\n  - name: purpose\n    type: select\n    label: \"Cel podróży\"\n    required: true\n    options:\n      - { label: \"Turystyka\", value: \"turystyka\" }\n      - { label: \"Biznes\", value: \"biznes\" }\n      - { label: \"Studia\", value: \"studia\" }\n```",
+            "domainId": "visa_application",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 72,
+        "vars": {
+          "request": "callout#visa-fee(text=\"Opłata wizowa\")\nform#visa-form[passport_number*^:t, photo*:f, purpose*:s{turystyka|biznes|studia}](action=submit-visa)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: visa-fee\ntype: callout\nvariant: info\ntitle: \"Opłata wizowa\"\ncontent: \"Opłata wizowa jest bezzwrotna, niezależnie od decyzji konsulatu.\"\ndismissible: false\n```\n\n```mdma\nid: visa-form\ntype: form\nonSubmit: submit-visa\nfields:\n  - name: passport_number\n    type: text\n    label: \"Numer paszportu\"\n    required: true\n    sensitive: true\n  - name: photo\n    type: file\n    label: \"Zdjęcie\"\n    required: true\n  - name: purpose\n    type: select\n    label: \"Cel podróży\"\n    required: true\n    options:\n      - { label: \"Turystyka\", value: \"turystyka\" }\n      - { label: \"Biznes\", value: \"biznes\" }\n      - { label: \"Studia\", value: \"studia\" }\n```",
+          "domainId": "visa_application",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1192",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:43 GMT",
+              "modal-function-call-id": "fc-01KWC8CMRTPEMF3235H8EPZQ82",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "78b59089-a1d5-4106-9e35-ed04f9d12196",
+        "latencyMs": 1781,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#cancellation(text=\\\"Free cancellation\\\")\\nform#booking-form[check_in*:d, check_out*:d, room_type*:s{standard|deluxe|suite}](action=book-room)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: cancellation-notice\ntype: callout\ncontent: Free cancellation\nvariant: success\n```\n\n```mdma\nid: booking-form\ntype: form\nfields:\n  - name: check_in\n    type: date\n    label: Check-in Date\n    required: true\n  - name: check_out\n    type: date\n    label: Check-out Date\n    required: true\n  - name: room_type\n    type: select\n    label: Room Type\n    required: true\n    options:\n      - label: Standard\n        value: \"standard\"\n      - label: Deluxe\n        value: \"deluxe\"\n      - label: Suite\n        value: \"suite\"\nonSubmit: book-room\n```\n\n```mdma\nid: book-room\ntype: button\ntext: Confirm Booking\nonAction: book-room\n```",
+          "tokenUsage": {
+            "total": 890,
+            "prompt": 691,
+            "completion": 199,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1781,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1306",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:45 GMT",
+                "modal-function-call-id": "fc-01KWC8CPBDW3GR9NCS1E6NJE7J",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_hotel_booking_en",
+          "vars": {
+            "request": "callout#cancellation(text=\"Free cancellation\")\nform#booking-form[check_in*:d, check_out*:d, room_type*:s{standard|deluxe|suite}](action=book-room)",
+            "expected_mdma": "```mdma\nid: cancellation\ntype: callout\nvariant: info\ntitle: \"Free cancellation\"\ncontent: \"Free cancellation up to 48 hours before check-in. Later cancellations are charged one night.\"\ndismissible: true\n```\n\n```mdma\nid: booking-form\ntype: form\nonSubmit: book-room\nfields:\n  - name: check_in\n    type: date\n    label: \"Check-in\"\n    required: true\n  - name: check_out\n    type: date\n    label: \"Check-out\"\n    required: true\n  - name: room_type\n    type: select\n    label: \"Room type\"\n    required: true\n    options:\n      - { label: \"Standard\", value: \"standard\" }\n      - { label: \"Deluxe\", value: \"deluxe\" }\n      - { label: \"Suite\", value: \"suite\" }\n```",
+            "domainId": "hotel_booking",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 73,
+        "vars": {
+          "request": "callout#cancellation(text=\"Free cancellation\")\nform#booking-form[check_in*:d, check_out*:d, room_type*:s{standard|deluxe|suite}](action=book-room)",
+          "expected_mdma": "```mdma\nid: cancellation\ntype: callout\nvariant: info\ntitle: \"Free cancellation\"\ncontent: \"Free cancellation up to 48 hours before check-in. Later cancellations are charged one night.\"\ndismissible: true\n```\n\n```mdma\nid: booking-form\ntype: form\nonSubmit: book-room\nfields:\n  - name: check_in\n    type: date\n    label: \"Check-in\"\n    required: true\n  - name: check_out\n    type: date\n    label: \"Check-out\"\n    required: true\n  - name: room_type\n    type: select\n    label: \"Room type\"\n    required: true\n    options:\n      - { label: \"Standard\", value: \"standard\" }\n      - { label: \"Deluxe\", value: \"deluxe\" }\n      - { label: \"Suite\", value: \"suite\" }\n```",
+          "domainId": "hotel_booking",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1306",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:45 GMT",
+              "modal-function-call-id": "fc-01KWC8CPBDW3GR9NCS1E6NJE7J",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "b75f0495-578b-48b4-abbd-1e5f9e3d396f",
+        "latencyMs": 2084,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"tasklist#itinerary-tasks\\ntable#bookings\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: itinerary-tasks\ntype: tasklist\nitems:\n  - id: hotel-checkin\n    text: Confirm hotel check-in time\n  - id: flight-ready\n    text: Download boarding passes\n  - id: packing-done\n    text: Complete packing checklist\n```\n\n```mdma\nid: bookings\ntype: table\ncolumns:\n  - key: type\n    header: Type\n  - key: details\n    header: Details\n  - key: date\n    header: Date\n  - key: status\n    header: Status\ndata:\n  - type: Flight\n    details: Flight AA123 (JFK -> LAX)\n    date: 2023-11-15\n    status: Confirmed\n  - type: Hotel\n    details: Grand Plaza Hotel\n    date: 2023-11-15\n    status: Confirmed\n  - type: Car Rental\n    details: Hertz SUV\n    date: 2023-11-16\n    status: Pending\n```",
+          "tokenUsage": {
+            "total": 895,
+            "prompt": 652,
+            "completion": 243,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2084,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1355",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:47 GMT",
+                "modal-function-call-id": "fc-01KWC8CR2Y1VEK8JXZCJVTG23P",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_trip_itinerary_en",
+          "vars": {
+            "request": "tasklist#itinerary-tasks\ntable#bookings",
+            "expected_mdma": "```mdma\nid: itinerary-tasks\ntype: tasklist\nitems:\n  - id: day-1\n    text: \"Day 1 — arrival and city walk\"\n  - id: day-2\n    text: \"Day 2 — museum and old town\"\n  - id: day-3\n    text: \"Day 3 — day trip and departure\"\n```\n\n```mdma\nid: bookings\ntype: table\ncolumns:\n  - { key: item, header: \"Booking\" }\n  - { key: date, header: \"Date\" }\n  - { key: ref, header: \"Reference\" }\ndata:\n  - { item: \"Flight\", date: \"2026-07-10\", ref: \"LO123\" }\n  - { item: \"Hotel\", date: \"2026-07-10\", ref: \"BK-9981\" }\n```",
+            "domainId": "trip_itinerary",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 74,
+        "vars": {
+          "request": "tasklist#itinerary-tasks\ntable#bookings",
+          "expected_mdma": "```mdma\nid: itinerary-tasks\ntype: tasklist\nitems:\n  - id: day-1\n    text: \"Day 1 — arrival and city walk\"\n  - id: day-2\n    text: \"Day 2 — museum and old town\"\n  - id: day-3\n    text: \"Day 3 — day trip and departure\"\n```\n\n```mdma\nid: bookings\ntype: table\ncolumns:\n  - { key: item, header: \"Booking\" }\n  - { key: date, header: \"Date\" }\n  - { key: ref, header: \"Reference\" }\ndata:\n  - { item: \"Flight\", date: \"2026-07-10\", ref: \"LO123\" }\n  - { item: \"Hotel\", date: \"2026-07-10\", ref: \"BK-9981\" }\n```",
+          "domainId": "trip_itinerary",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1355",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:47 GMT",
+              "modal-function-call-id": "fc-01KWC8CR2Y1VEK8JXZCJVTG23P",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "5c103bb8-bf24-41e1-9dbe-9e71497a6be4",
+        "latencyMs": 1033,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#fare-difference(text=\\\"Dopłata\\\")\\nform#change-form[booking_ref*:t, new_date*:d](action=submit-change)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: callout\nid: fare-difference\ncontent: Dopłata\nvariant: info\n```\n\n```mdma\ntype: form\nid: change-form\nfields:\n  - name: booking_ref\n    type: text\n    label: Numer rezerwacji\n    required: true\n  - name: new_date\n    type: date\n    label: Nowa data\n    required: true\nonSubmit: submit-change\n```",
+          "tokenUsage": {
+            "total": 788,
+            "prompt": 685,
+            "completion": 103,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1033,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "957",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:49 GMT",
+                "modal-function-call-id": "fc-01KWC8CT52RMG1NB4V1PEBBJAK",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_flight_change_pl",
+          "vars": {
+            "request": "callout#fare-difference(text=\"Dopłata\")\nform#change-form[booking_ref*:t, new_date*:d](action=submit-change)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: fare-difference\ntype: callout\nvariant: warning\ntitle: \"Dopłata\"\ncontent: \"Zmiana terminu może wiązać się z dopłatą do różnicy w cenie biletu.\"\ndismissible: false\n```\n\n```mdma\nid: change-form\ntype: form\nonSubmit: submit-change\nfields:\n  - name: booking_ref\n    type: text\n    label: \"Numer rezerwacji\"\n    required: true\n  - name: new_date\n    type: date\n    label: \"Nowa data\"\n    required: true\n```",
+            "domainId": "flight_change",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 75,
+        "vars": {
+          "request": "callout#fare-difference(text=\"Dopłata\")\nform#change-form[booking_ref*:t, new_date*:d](action=submit-change)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: fare-difference\ntype: callout\nvariant: warning\ntitle: \"Dopłata\"\ncontent: \"Zmiana terminu może wiązać się z dopłatą do różnicy w cenie biletu.\"\ndismissible: false\n```\n\n```mdma\nid: change-form\ntype: form\nonSubmit: submit-change\nfields:\n  - name: booking_ref\n    type: text\n    label: \"Numer rezerwacji\"\n    required: true\n  - name: new_date\n    type: date\n    label: \"Nowa data\"\n    required: true\n```",
+          "domainId": "flight_change",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "957",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:49 GMT",
+              "modal-function-call-id": "fc-01KWC8CT52RMG1NB4V1PEBBJAK",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "ed8b0f1a-fbec-4a18-ae33-54c6736dad8a",
+        "latencyMs": 1494,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#inquiry-form[listing_id*:t, viewing_date:d, phone*^:t](action=submit-inquiry)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: inquiry-form\ntype: form\ntitle: Formularz zapytania\nonSubmit: submit-inquiry\nfields:\n  - name: listing_id\n    type: text\n    label: ID oferty\n    required: true\n  - name: viewing_date\n    type: date\n    label: Data oględzin\n  - name: phone\n    type: text\n    label: Numer telefonu\n    required: true\n    sensitive: true\n```\n\n```mdma\nid: submit-inquiry\ntype: webhook\nurl: https://api.example.com/submit-inquiry\ntrigger: handle-inquiry-submission\n```",
+          "tokenUsage": {
+            "total": 826,
+            "prompt": 678,
+            "completion": 148,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1494,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1115",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:50 GMT",
+                "modal-function-call-id": "fc-01KWC8CV5V2MVYJ23728ZYAP6S",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_property_inquiry_pl",
+          "vars": {
+            "request": "form#inquiry-form[listing_id*:t, viewing_date:d, phone*^:t](action=submit-inquiry)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: inquiry-form\ntype: form\nonSubmit: submit-inquiry\nfields:\n  - name: listing_id\n    type: text\n    label: \"Numer oferty\"\n    required: true\n  - name: viewing_date\n    type: date\n    label: \"Preferowana data oglądania\"\n  - name: phone\n    type: text\n    label: \"Telefon kontaktowy\"\n    required: true\n    sensitive: true\n```",
+            "domainId": "property_inquiry",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 76,
+        "vars": {
+          "request": "form#inquiry-form[listing_id*:t, viewing_date:d, phone*^:t](action=submit-inquiry)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: inquiry-form\ntype: form\nonSubmit: submit-inquiry\nfields:\n  - name: listing_id\n    type: text\n    label: \"Numer oferty\"\n    required: true\n  - name: viewing_date\n    type: date\n    label: \"Preferowana data oglądania\"\n  - name: phone\n    type: text\n    label: \"Telefon kontaktowy\"\n    required: true\n    sensitive: true\n```",
+          "domainId": "property_inquiry",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1115",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:50 GMT",
+              "modal-function-call-id": "fc-01KWC8CV5V2MVYJ23728ZYAP6S",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "d0bf1455-465d-4754-9feb-f474964346a8",
+        "latencyMs": 1228,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#screening-note(text=\\\"Screening\\\")\\nform#tenant-form[full_name*:t, monthly_income*^:n, income_proof*^:f](action=submit-tenant)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: callout\nid: screening-note\ncontent: Screening\nvariant: info\n```\n\n```mdma\ntype: form\nid: tenant-form\nfields:\n  - name: full_name\n    type: text\n    label: Full Name\n    required: true\n    sensitive: true\n  - name: monthly_income\n    type: number\n    label: Monthly Income\n    required: true\n  - name: income_proof\n    type: file\n    label: Income Proof\n    required: true\nonSubmit: submit-tenant\n```",
+          "tokenUsage": {
+            "total": 815,
+            "prompt": 688,
+            "completion": 127,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1228,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1067",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:51 GMT",
+                "modal-function-call-id": "fc-01KWC8CWN6CX9Y2GCZQC8PD25T",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_tenant_application_en",
+          "vars": {
+            "request": "callout#screening-note(text=\"Screening\")\nform#tenant-form[full_name*:t, monthly_income*^:n, income_proof*^:f](action=submit-tenant)",
+            "expected_mdma": "```mdma\nid: screening-note\ntype: callout\nvariant: info\ntitle: \"Screening\"\ncontent: \"Applications include a credit and reference check. Your information is handled confidentially.\"\ndismissible: false\n```\n\n```mdma\nid: tenant-form\ntype: form\nonSubmit: submit-tenant\nfields:\n  - name: full_name\n    type: text\n    label: \"Full name\"\n    required: true\n  - name: monthly_income\n    type: number\n    label: \"Monthly income\"\n    required: true\n    sensitive: true\n  - name: income_proof\n    type: file\n    label: \"Proof of income\"\n    required: true\n    sensitive: true\n```",
+            "domainId": "tenant_application",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 77,
+        "vars": {
+          "request": "callout#screening-note(text=\"Screening\")\nform#tenant-form[full_name*:t, monthly_income*^:n, income_proof*^:f](action=submit-tenant)",
+          "expected_mdma": "```mdma\nid: screening-note\ntype: callout\nvariant: info\ntitle: \"Screening\"\ncontent: \"Applications include a credit and reference check. Your information is handled confidentially.\"\ndismissible: false\n```\n\n```mdma\nid: tenant-form\ntype: form\nonSubmit: submit-tenant\nfields:\n  - name: full_name\n    type: text\n    label: \"Full name\"\n    required: true\n  - name: monthly_income\n    type: number\n    label: \"Monthly income\"\n    required: true\n    sensitive: true\n  - name: income_proof\n    type: file\n    label: \"Proof of income\"\n    required: true\n    sensitive: true\n```",
+          "domainId": "tenant_application",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1067",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:51 GMT",
+              "modal-function-call-id": "fc-01KWC8CWN6CX9Y2GCZQC8PD25T",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "dcfc21ba-eac8-46e2-b854-8afca9e04b9f",
+        "latencyMs": 1381,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#maintenance-form[category*:s{plumbing|electrical|heating}, urgency*:s{low|medium|high}, photo:f](action=submit-maintenance)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: maintenance-form\ntype: form\nonSubmit: submit-maintenance\nfields:\n  - name: category\n    type: select\n    label: Maintenance Category\n    options:\n      - label: Plumbing\n        value: plumbing\n      - label: Electrical\n        value: electrical\n      - label: Heating\n        value: heating\n  - name: urgency\n    type: select\n    label: Urgency Level\n    options:\n      - label: Low\n        value: low\n      - label: Medium\n        value: medium\n      - label: High\n        value: high\n  - name: photo\n    type: file\n    label: Evidence Photo\n```",
+          "tokenUsage": {
+            "total": 829,
+            "prompt": 678,
+            "completion": 151,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1381,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1217",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:52 GMT",
+                "modal-function-call-id": "fc-01KWC8CXWFY1MVCAFXM27GW5HP",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_maintenance_request_en",
+          "vars": {
+            "request": "form#maintenance-form[category*:s{plumbing|electrical|heating}, urgency*:s{low|medium|high}, photo:f](action=submit-maintenance)",
+            "expected_mdma": "```mdma\nid: maintenance-form\ntype: form\nonSubmit: submit-maintenance\nfields:\n  - name: category\n    type: select\n    label: \"Issue category\"\n    required: true\n    options:\n      - { label: \"Plumbing\", value: \"plumbing\" }\n      - { label: \"Electrical\", value: \"electrical\" }\n      - { label: \"Heating\", value: \"heating\" }\n  - name: urgency\n    type: select\n    label: \"Urgency\"\n    required: true\n    options:\n      - { label: \"Low\", value: \"low\" }\n      - { label: \"Medium\", value: \"medium\" }\n      - { label: \"High\", value: \"high\" }\n  - name: photo\n    type: file\n    label: \"Photo of the issue\"\n```",
+            "domainId": "maintenance_request",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 78,
+        "vars": {
+          "request": "form#maintenance-form[category*:s{plumbing|electrical|heating}, urgency*:s{low|medium|high}, photo:f](action=submit-maintenance)",
+          "expected_mdma": "```mdma\nid: maintenance-form\ntype: form\nonSubmit: submit-maintenance\nfields:\n  - name: category\n    type: select\n    label: \"Issue category\"\n    required: true\n    options:\n      - { label: \"Plumbing\", value: \"plumbing\" }\n      - { label: \"Electrical\", value: \"electrical\" }\n      - { label: \"Heating\", value: \"heating\" }\n  - name: urgency\n    type: select\n    label: \"Urgency\"\n    required: true\n    options:\n      - { label: \"Low\", value: \"low\" }\n      - { label: \"Medium\", value: \"medium\" }\n      - { label: \"High\", value: \"high\" }\n  - name: photo\n    type: file\n    label: \"Photo of the issue\"\n```",
+          "domainId": "maintenance_request",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1217",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:52 GMT",
+              "modal-function-call-id": "fc-01KWC8CXWFY1MVCAFXM27GW5HP",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (1 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "6c05efc7-e526-4d5f-be30-16b705536d75",
+        "latencyMs": 1711,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#contract-form[contract_type*:s{msa|sow|dpa}, document*:f, counterparty*:t](action=submit-contract)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: contract-form\ntype: form\nfields:\n  - name: contract_type\n    type: select\n    label: Contract Type\n    required: true\n    options:\n      - label: \"MSA\"\n        value: \"msa\"\n      - label: \"SOW\"\n        value: \"sow\"\n      - label: \"DPA\"\n        value: \"dpa\"\n  - name: document\n    type: file\n    label: Upload Contract\n    required: true\n  - name: counterparty\n    type: text\n    label: Counterparty Name\n    required: true\nonSubmit: submit-contract\n```\n\n```mdma\nid: submit-contract\ntype: webhook\nurl: https://api.legal-ops.example.com/contracts/submit\ntrigger: submit-contract\n```",
+          "tokenUsage": {
+            "total": 866,
+            "prompt": 677,
+            "completion": 189,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1711,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1265",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:54 GMT",
+                "modal-function-call-id": "fc-01KWC8CZ8C3TVECY13RT10SC9X",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_contract_intake_en",
+          "vars": {
+            "request": "form#contract-form[contract_type*:s{msa|sow|dpa}, document*:f, counterparty*:t](action=submit-contract)",
+            "expected_mdma": "```mdma\nid: contract-form\ntype: form\nonSubmit: submit-contract\nfields:\n  - name: contract_type\n    type: select\n    label: \"Contract type\"\n    required: true\n    options:\n      - { label: \"MSA\", value: \"msa\" }\n      - { label: \"SOW\", value: \"sow\" }\n      - { label: \"DPA\", value: \"dpa\" }\n  - name: document\n    type: file\n    label: \"Contract document\"\n    required: true\n  - name: counterparty\n    type: text\n    label: \"Counterparty\"\n    required: true\n```",
+            "domainId": "contract_intake",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 79,
+        "vars": {
+          "request": "form#contract-form[contract_type*:s{msa|sow|dpa}, document*:f, counterparty*:t](action=submit-contract)",
+          "expected_mdma": "```mdma\nid: contract-form\ntype: form\nonSubmit: submit-contract\nfields:\n  - name: contract_type\n    type: select\n    label: \"Contract type\"\n    required: true\n    options:\n      - { label: \"MSA\", value: \"msa\" }\n      - { label: \"SOW\", value: \"sow\" }\n      - { label: \"DPA\", value: \"dpa\" }\n  - name: document\n    type: file\n    label: \"Contract document\"\n    required: true\n  - name: counterparty\n    type: text\n    label: \"Counterparty\"\n    required: true\n```",
+          "domainId": "contract_intake",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1265",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:54 GMT",
+              "modal-function-call-id": "fc-01KWC8CZ8C3TVECY13RT10SC9X",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "67335bdf-6556-42fc-bea5-24796096e264",
+        "latencyMs": 1896,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#nda-disclaimer(text=\\\"Template only\\\")\\nform#nda-form[party_a*:t, party_b*:t, mutual*:s{mutual|one-way}, term_months*:n](action=generate-nda)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: callout\nid: nda-disclaimer\ncontent: \"This is a template only and does not constitute legal advice.\"\nvariant: info\n```\n\n```mdma\ntype: form\nid: nda-form\nfields:\n  - name: party_a\n    type: text\n    label: Legal Name of First Party\n    required: true\n    sensitive: true\n  - name: party_b\n    type: text\n    label: Legal Name of Second Party\n    required: true\n    sensitive: true\n  - name: mutual\n    type: select\n    label: Agreement Type\n    required: true\n    options:\n      - label: \"Mutual\"\n        value: \"mutual\"\n      - label: \"One-Way\"\n        value: \"one-way\"\n  - name: term_months\n    type: number\n    label: Term Duration (Months)\n    required: true\nonSubmit: generate-nda\n```",
+          "tokenUsage": {
+            "total": 911,
+            "prompt": 695,
+            "completion": 216,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1896,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1375",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:56 GMT",
+                "modal-function-call-id": "fc-01KWC8D0YKRRPRWTS1ZZZ1K58S",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_nda_request_en",
+          "vars": {
+            "request": "callout#nda-disclaimer(text=\"Template only\")\nform#nda-form[party_a*:t, party_b*:t, mutual*:s{mutual|one-way}, term_months*:n](action=generate-nda)",
+            "expected_mdma": "```mdma\nid: nda-disclaimer\ntype: callout\nvariant: warning\ntitle: \"Template only\"\ncontent: \"This generates a template NDA and is not a substitute for legal advice.\"\ndismissible: false\n```\n\n```mdma\nid: nda-form\ntype: form\nonSubmit: generate-nda\nfields:\n  - name: party_a\n    type: text\n    label: \"Disclosing party\"\n    required: true\n  - name: party_b\n    type: text\n    label: \"Receiving party\"\n    required: true\n  - name: mutual\n    type: select\n    label: \"Type\"\n    required: true\n    options:\n      - { label: \"Mutual\", value: \"mutual\" }\n      - { label: \"One-way\", value: \"one-way\" }\n  - name: term_months\n    type: number\n    label: \"Term (months)\"\n    required: true\n```",
+            "domainId": "nda_request",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 80,
+        "vars": {
+          "request": "callout#nda-disclaimer(text=\"Template only\")\nform#nda-form[party_a*:t, party_b*:t, mutual*:s{mutual|one-way}, term_months*:n](action=generate-nda)",
+          "expected_mdma": "```mdma\nid: nda-disclaimer\ntype: callout\nvariant: warning\ntitle: \"Template only\"\ncontent: \"This generates a template NDA and is not a substitute for legal advice.\"\ndismissible: false\n```\n\n```mdma\nid: nda-form\ntype: form\nonSubmit: generate-nda\nfields:\n  - name: party_a\n    type: text\n    label: \"Disclosing party\"\n    required: true\n  - name: party_b\n    type: text\n    label: \"Receiving party\"\n    required: true\n  - name: mutual\n    type: select\n    label: \"Type\"\n    required: true\n    options:\n      - { label: \"Mutual\", value: \"mutual\" }\n      - { label: \"One-way\", value: \"one-way\" }\n  - name: term_months\n    type: number\n    label: \"Term (months)\"\n    required: true\n```",
+          "domainId": "nda_request",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1375",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:56 GMT",
+              "modal-function-call-id": "fc-01KWC8D0YKRRPRWTS1ZZZ1K58S",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "311b5a1b-efce-438a-9a49-b7f5b939c82a",
+        "latencyMs": 619,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#privilege(text=\\\"Privileged\\\")\\napproval-gate#conflict-check(text=\\\"Conflict check\\\")\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: privilege\ntype: callout\ncontent: Privileged\nvariant: info\n```\n\n```mdma\nid: conflict-check\ntype: approval-gate\ntitle: Conflict check\n```",
+          "tokenUsage": {
+            "total": 713,
+            "prompt": 666,
+            "completion": 47,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 619,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "787",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:58 GMT",
+                "modal-function-call-id": "fc-01KWC8D2TPGT6HVV38YC4JDBME",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_case_intake_en",
+          "vars": {
+            "request": "callout#privilege(text=\"Privileged\")\napproval-gate#conflict-check(text=\"Conflict check\")",
+            "expected_mdma": "```mdma\nid: privilege\ntype: callout\nvariant: info\ntitle: \"Privileged\"\ncontent: \"Information submitted here may be protected by attorney-client privilege.\"\ndismissible: false\n```\n\n```mdma\nid: conflict-check\ntype: approval-gate\ntitle: \"Conflict check\"\nallowedRoles: [partner]\nonApprove: open-matter\nonDeny: decline-matter\nrequireReason: false\n```",
+            "domainId": "case_intake",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 81,
+        "vars": {
+          "request": "callout#privilege(text=\"Privileged\")\napproval-gate#conflict-check(text=\"Conflict check\")",
+          "expected_mdma": "```mdma\nid: privilege\ntype: callout\nvariant: info\ntitle: \"Privileged\"\ncontent: \"Information submitted here may be protected by attorney-client privilege.\"\ndismissible: false\n```\n\n```mdma\nid: conflict-check\ntype: approval-gate\ntitle: \"Conflict check\"\nallowedRoles: [partner]\nonApprove: open-matter\nonDeny: decline-matter\nrequireReason: false\n```",
+          "domainId": "case_intake",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "787",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:58 GMT",
+              "modal-function-call-id": "fc-01KWC8D2TPGT6HVV38YC4JDBME",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "828a4fac-9c77-46a3-aaaa-33ed9e420491",
+        "latencyMs": 1386,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#conflict-callout(text=\\\"Conflicting requirements\\\")\\nform#min-kyc-form[legal_name*^:t, national_id*^:t](action=submit-min-kyc)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: conflict-callout\ntype: callout\ncontent: \"Conflicting requirements detected in the current configuration. Please address these before proceeding.\"\nvariant: warning\n```\n\n```mdma\nid: min-kyc-form\ntype: form\nonSubmit: submit-min-kyc\nfields:\n  - name: legal_name\n    type: text\n    label: Legal Name\n    sensitive: true\n    required: true\n  - name: national_id\n    type: text\n    label: National ID Number\n    sensitive: true\n    required: true\n```",
+          "tokenUsage": {
+            "total": 821,
+            "prompt": 690,
+            "completion": 131,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1386,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1109",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:34:58 GMT",
+                "modal-function-call-id": "fc-01KWC8D3FY9AN9R20NYC954DK4",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_adv_kyc_conflicting_en",
+          "vars": {
+            "request": "callout#conflict-callout(text=\"Conflicting requirements\")\nform#min-kyc-form[legal_name*^:t, national_id*^:t](action=submit-min-kyc)",
+            "expected_mdma": "```mdma\nid: conflict-callout\ntype: callout\nvariant: error\ntitle: \"Conflicting requirements\"\ncontent: \"A verified account cannot be anonymous. Identity details below are required for verification and are never shown publicly.\"\ndismissible: false\n```\n\n```mdma\nid: min-kyc-form\ntype: form\nonSubmit: submit-min-kyc\nfields:\n  - name: legal_name\n    type: text\n    label: \"Legal name\"\n    required: true\n    sensitive: true\n  - name: national_id\n    type: text\n    label: \"National ID\"\n    required: true\n    sensitive: true\n```",
+            "domainId": "kyc_basic",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 82,
+        "vars": {
+          "request": "callout#conflict-callout(text=\"Conflicting requirements\")\nform#min-kyc-form[legal_name*^:t, national_id*^:t](action=submit-min-kyc)",
+          "expected_mdma": "```mdma\nid: conflict-callout\ntype: callout\nvariant: error\ntitle: \"Conflicting requirements\"\ncontent: \"A verified account cannot be anonymous. Identity details below are required for verification and are never shown publicly.\"\ndismissible: false\n```\n\n```mdma\nid: min-kyc-form\ntype: form\nonSubmit: submit-min-kyc\nfields:\n  - name: legal_name\n    type: text\n    label: \"Legal name\"\n    required: true\n    sensitive: true\n  - name: national_id\n    type: text\n    label: \"National ID\"\n    required: true\n    sensitive: true\n```",
+          "domainId": "kyc_basic",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1109",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:34:58 GMT",
+              "modal-function-call-id": "fc-01KWC8D3FY9AN9R20NYC954DK4",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "50ea77ee-b522-40e0-8ff4-48bcee003b10",
+        "latencyMs": 903,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#simple-return-form[order_id*:t, reason*:ta](action=start-return)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: form\nid: simple-return-form\nfields:\n  - name: order_id\n    type: text\n    label: Order ID\n    required: true\n  - name: reason\n    type: textarea\n    label: Reason for Return\n    required: true\nonSubmit: start-return\n```",
+          "tokenUsage": {
+            "total": 739,
+            "prompt": 665,
+            "completion": 74,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 903,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "875",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:00 GMT",
+                "modal-function-call-id": "fc-01KWC8D4TBTQYRTYDHSMWNS1H7",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_adv_return_ambiguous_en",
+          "vars": {
+            "request": "form#simple-return-form[order_id*:t, reason*:ta](action=start-return)",
+            "expected_mdma": "```mdma\nid: simple-return-form\ntype: form\nonSubmit: start-return\nfields:\n  - name: order_id\n    type: text\n    label: \"Order number\"\n    required: true\n  - name: reason\n    type: textarea\n    label: \"What would you like to return and why?\"\n    required: true\n```",
+            "domainId": "ecommerce_return",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 83,
+        "vars": {
+          "request": "form#simple-return-form[order_id*:t, reason*:ta](action=start-return)",
+          "expected_mdma": "```mdma\nid: simple-return-form\ntype: form\nonSubmit: start-return\nfields:\n  - name: order_id\n    type: text\n    label: \"Order number\"\n    required: true\n  - name: reason\n    type: textarea\n    label: \"What would you like to return and why?\"\n    required: true\n```",
+          "domainId": "ecommerce_return",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "875",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:00 GMT",
+              "modal-function-call-id": "fc-01KWC8D4TBTQYRTYDHSMWNS1H7",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "2c2b044f-c207-442c-850f-d0e9ad769ff6",
+        "latencyMs": 1051,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#mixed-intake-form[height_cm*:n, weight_kg*:n, last_rtg:d](action=submit-mixed-intake)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: mixed-intake-form\ntype: form\nfields:\n  - name: height_cm\n    type: number\n    label: Wzrost (cm)\n  - name: weight_kg\n    type: number\n    label: Waga (kg)\n  - name: last_rtg\n    type: date\n    label: Ostatnia RTG\nonSubmit: submit-mixed-intake\n```",
+          "tokenUsage": {
+            "total": 782,
+            "prompt": 683,
+            "completion": 99,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1051,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "901",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:00 GMT",
+                "modal-function-call-id": "fc-01KWC8D5RG3E7H3BGBNQT12F4E",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_adv_medical_mixed_lang_pl",
+          "vars": {
+            "request": "form#mixed-intake-form[height_cm*:n, weight_kg*:n, last_rtg:d](action=submit-mixed-intake)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: mixed-intake-form\ntype: form\nonSubmit: submit-mixed-intake\nfields:\n  - name: height_cm\n    type: number\n    label: \"Wzrost (cm)\"\n    required: true\n  - name: weight_kg\n    type: number\n    label: \"Waga (kg)\"\n    required: true\n  - name: last_rtg\n    type: date\n    label: \"Data ostatniego badania RTG\"\n```",
+            "domainId": "medical_intake_clinic",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 84,
+        "vars": {
+          "request": "form#mixed-intake-form[height_cm*:n, weight_kg*:n, last_rtg:d](action=submit-mixed-intake)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: mixed-intake-form\ntype: form\nonSubmit: submit-mixed-intake\nfields:\n  - name: height_cm\n    type: number\n    label: \"Wzrost (cm)\"\n    required: true\n  - name: weight_kg\n    type: number\n    label: \"Waga (kg)\"\n    required: true\n  - name: last_rtg\n    type: date\n    label: \"Data ostatniego badania RTG\"\n```",
+          "domainId": "medical_intake_clinic",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "901",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:00 GMT",
+              "modal-function-call-id": "fc-01KWC8D5RG3E7H3BGBNQT12F4E",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "5c47a139-3c81-4624-b289-9efcbb152175",
+        "latencyMs": 1189,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"form#precheck-form[ssn*^:t, date_of_birth*^:d, annual_income*^:n, bank_account*^:t](action=submit-precheck)\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: precheck-form\ntype: form\nfields:\n  - name: ssn\n    type: text\n    label: SSN\n    sensitive: true\n  - name: date_of_birth\n    type: date\n    label: Date of Birth\n    sensitive: true\n  - name: annual_income\n    type: number\n    label: Annual Income\n  - name: bank_account\n    type: text\n    label: Bank Account Number\n    sensitive: true\nonSubmit: submit-precheck\n```",
+          "tokenUsage": {
+            "total": 809,
+            "prompt": 688,
+            "completion": 121,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1189,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1027",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:02 GMT",
+                "modal-function-call-id": "fc-01KWC8D6S6FRGY95W0WVFA56FX",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_adv_loan_over_constrained_en",
+          "vars": {
+            "request": "form#precheck-form[ssn*^:t, date_of_birth*^:d, annual_income*^:n, bank_account*^:t](action=submit-precheck)",
+            "expected_mdma": "```mdma\nid: precheck-form\ntype: form\nonSubmit: submit-precheck\nfields:\n  - name: ssn\n    type: text\n    label: \"Social security number\"\n    required: true\n    sensitive: true\n  - name: date_of_birth\n    type: date\n    label: \"Date of birth\"\n    required: true\n    sensitive: true\n  - name: annual_income\n    type: number\n    label: \"Annual income\"\n    required: true\n    sensitive: true\n  - name: bank_account\n    type: text\n    label: \"Bank account number\"\n    required: true\n    sensitive: true\n```",
+            "domainId": "loan_application",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 85,
+        "vars": {
+          "request": "form#precheck-form[ssn*^:t, date_of_birth*^:d, annual_income*^:n, bank_account*^:t](action=submit-precheck)",
+          "expected_mdma": "```mdma\nid: precheck-form\ntype: form\nonSubmit: submit-precheck\nfields:\n  - name: ssn\n    type: text\n    label: \"Social security number\"\n    required: true\n    sensitive: true\n  - name: date_of_birth\n    type: date\n    label: \"Date of birth\"\n    required: true\n    sensitive: true\n  - name: annual_income\n    type: number\n    label: \"Annual income\"\n    required: true\n    sensitive: true\n  - name: bank_account\n    type: text\n    label: \"Bank account number\"\n    required: true\n    sensitive: true\n```",
+          "domainId": "loan_application",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1027",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:02 GMT",
+              "modal-function-call-id": "fc-01KWC8D6S6FRGY95W0WVFA56FX",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "044b1f57-ddb5-4fbd-b697-f65aa5bb265a",
+        "latencyMs": 714,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#awaiting-signoff(text=\\\"Awaiting sign-off\\\")\\napproval-gate#partner-signoff(text=\\\"Partner sign-off\\\")\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: awaiting-signoff\ntype: callout\nvariant: info\ncontent: Awaiting sign-off\n```\n\n```mdma\nid: partner-signoff\ntype: approval-gate\ntitle: Partner sign-off\n```",
+          "tokenUsage": {
+            "total": 731,
+            "prompt": 675,
+            "completion": 56,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 714,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "804",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:03 GMT",
+                "modal-function-call-id": "fc-01KWC8D7YM9Z3Q3XF4176QZM7X",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_adv_approval_only_en",
+          "vars": {
+            "request": "callout#awaiting-signoff(text=\"Awaiting sign-off\")\napproval-gate#partner-signoff(text=\"Partner sign-off\")",
+            "expected_mdma": "```mdma\nid: awaiting-signoff\ntype: callout\nvariant: info\ntitle: \"Awaiting sign-off\"\ncontent: \"The contract has been reviewed by legal and is ready for partner sign-off.\"\ndismissible: false\n```\n\n```mdma\nid: partner-signoff\ntype: approval-gate\ntitle: \"Partner sign-off\"\nrequiredApprovers: 1\nallowedRoles: [partner]\nonApprove: approve-contract\nonDeny: reject-contract\nrequireReason: false\n```",
+            "domainId": "contract_intake",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 86,
+        "vars": {
+          "request": "callout#awaiting-signoff(text=\"Awaiting sign-off\")\napproval-gate#partner-signoff(text=\"Partner sign-off\")",
+          "expected_mdma": "```mdma\nid: awaiting-signoff\ntype: callout\nvariant: info\ntitle: \"Awaiting sign-off\"\ncontent: \"The contract has been reviewed by legal and is ready for partner sign-off.\"\ndismissible: false\n```\n\n```mdma\nid: partner-signoff\ntype: approval-gate\ntitle: \"Partner sign-off\"\nrequiredApprovers: 1\nallowedRoles: [partner]\nonApprove: approve-contract\nonDeny: reject-contract\nrequireReason: false\n```",
+          "domainId": "contract_intake",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "804",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:03 GMT",
+              "modal-function-call-id": "fc-01KWC8D7YM9Z3Q3XF4176QZM7X",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "8a3b365f-bc2c-4bb3-9950-9ae858dc631b",
+        "latencyMs": 1479,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"chart#cashflow(variant=area)\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: cashflow-analysis\ntype: chart\nlabel: Przepływy pieniężne\nvariant: area\ndata: |\n  Miesiąc,Wpływy,Wydatki\n  Styczeń,5000,3000\n  Luty,5500,3200\n  Marzec,4800,4000\n  Kwiecień,6000,3500\n  Maj,6200,3800\n  Czerwiec,5900,4100\n```",
+          "tokenUsage": {
+            "total": 793,
+            "prompt": 655,
+            "completion": 138,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 1479,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "883",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:03 GMT",
+                "modal-function-call-id": "fc-01KWC8D8PJD8FQSB2YWBSFGWP4",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_adv_chart_only_pl",
+          "vars": {
+            "request": "chart#cashflow(variant=area)\n@lang: pl",
+            "expected_mdma": "```mdma\nid: cashflow\ntype: chart\nvariant: area\ndata: \"{{finance.cashflow}}\"\nxAxis: month\nyAxis: net\n```",
+            "domainId": "budget_dashboard",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 87,
+        "vars": {
+          "request": "chart#cashflow(variant=area)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: cashflow\ntype: chart\nvariant: area\ndata: \"{{finance.cashflow}}\"\nxAxis: month\nyAxis: net\n```",
+          "domainId": "budget_dashboard",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "883",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:03 GMT",
+              "modal-function-call-id": "fc-01KWC8D8PJD8FQSB2YWBSFGWP4",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "2eba29be-f877-4622-859b-b615350345a9",
+        "latencyMs": 2845,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"table#cashflow-table\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: cashflow-table\ntype: table\ncolumns:\n  - key: date\n    header: Data\n  - key: category\n    header: Kategoria\n  - key: description\n    header: Opis\n  - key: amount\n    header: Kwota\n  - key: type\n    header: Typ\ndata:\n  - date: \"2023-10-01\"\n    category: \"Przychody\"\n    description: \"Sprzedaż usług\"\n    amount: 5000\n    type: \"Wpływ\"\n  - date: \"2023-10-05\"\n    category: \"Koszty\"\n    description: \"Czynsz biurowy\"\n    amount: -2000\n    type: \"Wydatki\"\n  - date: \"2023-10-10\"\n    category: \"Koszty\"\n    description: \"Subskrypcja oprogramowania\"\n    amount: -150\n    type: \"Wydatki\"\n  - date: \"2023-10-15\"\n    category: \"Przychody\"\n    description: \"Zwrot podatku\"\n    amount: 500\n    type: \"Wpływ\"\n```",
+          "tokenUsage": {
+            "total": 955,
+            "prompt": 652,
+            "completion": 303,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 2845,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "1412",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:05 GMT",
+                "modal-function-call-id": "fc-01KWC8DA46158HT40CGH8SGQQ0",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_adv_table_only_pl",
+          "vars": {
+            "request": "table#cashflow-table\n@lang: pl",
+            "expected_mdma": "```mdma\nid: cashflow-table\ntype: table\ncolumns:\n  - { key: month, header: \"Miesiąc\" }\n  - { key: inflow, header: \"Wpływy\" }\n  - { key: outflow, header: \"Wydatki\" }\ndata:\n  - { month: \"Styczeń\", inflow: 18000, outflow: 14200 }\n  - { month: \"Luty\", inflow: 17500, outflow: 15100 }\n```",
+            "domainId": "budget_dashboard",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 88,
+        "vars": {
+          "request": "table#cashflow-table\n@lang: pl",
+          "expected_mdma": "```mdma\nid: cashflow-table\ntype: table\ncolumns:\n  - { key: month, header: \"Miesiąc\" }\n  - { key: inflow, header: \"Wpływy\" }\n  - { key: outflow, header: \"Wydatki\" }\ndata:\n  - { month: \"Styczeń\", inflow: 18000, outflow: 14200 }\n  - { month: \"Luty\", inflow: 17500, outflow: 15100 }\n```",
+          "domainId": "budget_dashboard",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "1412",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:05 GMT",
+              "modal-function-call-id": "fc-01KWC8DA46158HT40CGH8SGQQ0",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "514c422c-4111-4068-a2b5-418280a92bcf",
+        "latencyMs": 458,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#request-received(text=\\\"Request received\\\")\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: request-received\ntype: callout\ncontent: Request received\nvariant: info\n```",
+          "tokenUsage": {
+            "total": 679,
+            "prompt": 653,
+            "completion": 26,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 458,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "720",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:08 GMT",
+                "modal-function-call-id": "fc-01KWC8DCZ8EN3AW73AXXKQ6QS2",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_foia_received_callout_en",
+          "vars": {
+            "request": "callout#request-received(text=\"Request received\")",
+            "expected_mdma": "```mdma\nid: request-received\ntype: callout\nvariant: success\ntitle: \"Request received\"\ncontent: \"Your records request was received. We will respond within 20 business days.\"\ndismissible: true\n```",
+            "domainId": "foia_request",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 89,
+        "vars": {
+          "request": "callout#request-received(text=\"Request received\")",
+          "expected_mdma": "```mdma\nid: request-received\ntype: callout\nvariant: success\ntitle: \"Request received\"\ncontent: \"Your records request was received. We will respond within 20 business days.\"\ndismissible: true\n```",
+          "domainId": "foia_request",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "720",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:08 GMT",
+              "modal-function-call-id": "fc-01KWC8DCZ8EN3AW73AXXKQ6QS2",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "a729b307-40f2-4352-b09c-1684e1da13db",
+        "latencyMs": 632,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#outage-notice(text=\\\"Przerwa w działaniu\\\")\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: outage-notice\ntype: callout\nvariant: warning\ncontent: Przerwa w działaniu\n```",
+          "tokenUsage": {
+            "total": 693,
+            "prompt": 663,
+            "completion": 30,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 632,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "724",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:08 GMT",
+                "modal-function-call-id": "fc-01KWC8DDFJQEF03PPBYXDN58G0",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_service_outage_callout_pl",
+          "vars": {
+            "request": "callout#outage-notice(text=\"Przerwa w działaniu\")\n@lang: pl",
+            "expected_mdma": "```mdma\nid: outage-notice\ntype: callout\nvariant: warning\ntitle: \"Przerwa w działaniu\"\ncontent: \"System zgłoszeń jest tymczasowo niedostępny z powodu prac serwisowych. Spróbuj ponownie później.\"\ndismissible: false\n```",
+            "domainId": "service_request_311",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 90,
+        "vars": {
+          "request": "callout#outage-notice(text=\"Przerwa w działaniu\")\n@lang: pl",
+          "expected_mdma": "```mdma\nid: outage-notice\ntype: callout\nvariant: warning\ntitle: \"Przerwa w działaniu\"\ncontent: \"System zgłoszeń jest tymczasowo niedostępny z powodu prac serwisowych. Spróbuj ponownie później.\"\ndismissible: false\n```",
+          "domainId": "service_request_311",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "724",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:08 GMT",
+              "modal-function-call-id": "fc-01KWC8DDFJQEF03PPBYXDN58G0",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "3a5c4079-82dd-422e-a5cb-e4f155fd2da9",
+        "latencyMs": 525,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#credit-notice(text=\\\"Provisional credit applied\\\")\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: credit-notice\ntype: callout\ncontent: Provisional credit applied\nvariant: success\n```",
+          "tokenUsage": {
+            "total": 682,
+            "prompt": 655,
+            "completion": 27,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 525,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "730",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:09 GMT",
+                "modal-function-call-id": "fc-01KWC8DE2AXYDJP50XAXS17BDP",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_dispute_credit_callout_en",
+          "vars": {
+            "request": "callout#credit-notice(text=\"Provisional credit applied\")",
+            "expected_mdma": "```mdma\nid: credit-notice\ntype: callout\nvariant: info\ntitle: \"Provisional credit applied\"\ncontent: \"A provisional credit has been applied to your account while we investigate the dispute.\"\ndismissible: true\n```",
+            "domainId": "card_dispute",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 91,
+        "vars": {
+          "request": "callout#credit-notice(text=\"Provisional credit applied\")",
+          "expected_mdma": "```mdma\nid: credit-notice\ntype: callout\nvariant: info\ntitle: \"Provisional credit applied\"\ncontent: \"A provisional credit has been applied to your account while we investigate the dispute.\"\ndismissible: true\n```",
+          "domainId": "card_dispute",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "730",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:09 GMT",
+              "modal-function-call-id": "fc-01KWC8DE2AXYDJP50XAXS17BDP",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "c11ad64e-3605-4f6f-8e84-11a6e1d259b2",
+        "latencyMs": 916,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#payout-activating(text=\\\"Activating payouts\\\")\\nwebhook#activate-payout\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: callout\nid: payout-activating\ncontent: \"Activating payouts\"\nvariant: info\n```\n\n```mdma\ntype: webhook\nid: activate-payout\nurl: \"https://api.provider.com/v1/payouts/activate\"\ntrigger: payout-activated\n```",
+          "tokenUsage": {
+            "total": 735,
+            "prompt": 662,
+            "completion": 73,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 916,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "861",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:09 GMT",
+                "modal-function-call-id": "fc-01KWC8DEKA0B4JR8HTC92GT80S",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_seller_payout_activate_en",
+          "vars": {
+            "request": "callout#payout-activating(text=\"Activating payouts\")\nwebhook#activate-payout",
+            "expected_mdma": "```mdma\nid: payout-activating\ntype: callout\nvariant: info\ntitle: \"Activating payouts\"\ncontent: \"Your payout details were saved — activating payouts now.\"\ndismissible: false\n```\n\n```mdma\nid: activate-payout\ntype: webhook\nurl: \"https://payouts.example.com/api/activate\"\nmethod: POST\ntrigger: submit-payout\n```",
+            "domainId": "seller_payout",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 92,
+        "vars": {
+          "request": "callout#payout-activating(text=\"Activating payouts\")\nwebhook#activate-payout",
+          "expected_mdma": "```mdma\nid: payout-activating\ntype: callout\nvariant: info\ntitle: \"Activating payouts\"\ncontent: \"Your payout details were saved — activating payouts now.\"\ndismissible: false\n```\n\n```mdma\nid: activate-payout\ntype: webhook\nurl: \"https://payouts.example.com/api/activate\"\nmethod: POST\ntrigger: submit-payout\n```",
+          "domainId": "seller_payout",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "861",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:09 GMT",
+              "modal-function-call-id": "fc-01KWC8DEKA0B4JR8HTC92GT80S",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "f47346cf-f8e7-460e-afa1-d590a8b11f3b",
+        "latencyMs": 775,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#claimant-notifying(text=\\\"Notifying claimant\\\")\\nwebhook#notify-claimant\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\ntype: callout\nid: claimant-notifying\ncontent: Notifying claimant\nvariant: info\n```\n\n```mdma\ntype: webhook\nid: notify-claimant\nurl: https://api.service.internal/notify\ntrigger: notify-claimant-webhook\n```",
+          "tokenUsage": {
+            "total": 729,
+            "prompt": 663,
+            "completion": 66,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 775,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "852",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:10 GMT",
+                "modal-function-call-id": "fc-01KWC8DFGEB98WRZ574ZMM44PN",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_claim_notify_webhook_en",
+          "vars": {
+            "request": "callout#claimant-notifying(text=\"Notifying claimant\")\nwebhook#notify-claimant",
+            "expected_mdma": "```mdma\nid: claimant-notifying\ntype: callout\nvariant: info\ntitle: \"Notifying claimant\"\ncontent: \"Sending the status update to the claimant notification service.\"\ndismissible: false\n```\n\n```mdma\nid: notify-claimant\ntype: webhook\nurl: \"https://notify.example.com/api/claims/status\"\nmethod: POST\ntrigger: update-status\nbody:\n  event: \"status-change\"\n```",
+            "domainId": "claim_webhook_notify",
+            "language": "en"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 93,
+        "vars": {
+          "request": "callout#claimant-notifying(text=\"Notifying claimant\")\nwebhook#notify-claimant",
+          "expected_mdma": "```mdma\nid: claimant-notifying\ntype: callout\nvariant: info\ntitle: \"Notifying claimant\"\ncontent: \"Sending the status update to the claimant notification service.\"\ndismissible: false\n```\n\n```mdma\nid: notify-claimant\ntype: webhook\nurl: \"https://notify.example.com/api/claims/status\"\nmethod: POST\ntrigger: update-status\nbody:\n  event: \"status-change\"\n```",
+          "domainId": "claim_webhook_notify",
+          "language": "en"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "852",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:10 GMT",
+              "modal-function-call-id": "fc-01KWC8DFGEB98WRZ574ZMM44PN",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      },
+      {
+        "cost": 0,
+        "gradingResult": {
+          "pass": true,
+          "score": 1,
+          "reason": "All assertions passed",
+          "namedScores": {},
+          "tokensUsed": {
+            "total": 0,
+            "prompt": 0,
+            "completion": 0,
+            "cached": 0,
+            "numRequests": 0
+          },
+          "componentResults": [
+            {
+              "pass": true,
+              "score": 1,
+              "reason": "Valid MDMA document (0 warnings, 0 info)",
+              "assertion": {
+                "type": "javascript",
+                "value": "file://assertions/validate-mdma.mjs",
+                "config": {
+                  "exclude": [
+                    "flow-ordering"
+                  ]
+                }
+              }
+            }
+          ]
+        },
+        "id": "70ae7671-973f-4a40-8223-f1e44cdcc5e2",
+        "latencyMs": 769,
+        "namedScores": {},
+        "prompt": {
+          "raw": "[{\"role\":\"system\",\"content\":\"You generate MDMA (Markdown Document with Mounted Applications) documents. Output ONLY valid MDMA YAML inside ```mdma code fences — no other prose and no outer markdown fence.\\n\\nEach ```mdma block defines exactly ONE component as top-level YAML keys (type, id, ...). Never wrap a single component in a \\\"components:\\\" array.\\n\\nYour entire response must contain AT MOST ONE interactive component (form, button, tasklist, approval-gate, or webhook). A form is submitted by its own \\\"onSubmit\\\" — NEVER add a separate submit button or an approval-gate beside it. Non-interactive components (callout, table, chart) may accompany it. Define an action's target component before anything that references it (no backward references).\\n\\nEvery component requires \\\"id\\\" and \\\"type\\\". \\\"type\\\" is one of: form, button, tasklist, table, callout, approval-gate, webhook, chart.\\n\\nComponent rules:\\n- form: requires \\\"onSubmit: <action-id>\\\" (a string). \\\"fields\\\" is a list; each field needs \\\"name\\\", \\\"type\\\", \\\"label\\\". Field \\\"type\\\" is one of: text, number, email, date, select, checkbox, textarea, file. A \\\"select\\\" field requires \\\"options\\\" (list of {label, value}). Mark every PII field (email, phone, name, address, SSN, date-of-birth, etc.) with \\\"sensitive: true\\\".\\n- button: requires \\\"text\\\" and \\\"onAction: <action-id>\\\".\\n- tasklist: \\\"items\\\" is a list of {id, text}.\\n- table: \\\"columns\\\" is a list of {key, header}; \\\"data\\\" is an array of row objects.\\n- callout: requires \\\"content\\\" (string); \\\"variant\\\" is one of info, warning, error, success.\\n- approval-gate: requires \\\"title\\\".\\n- webhook: requires \\\"url\\\" and \\\"trigger: <action-id>\\\".\\n- chart: use \\\"label\\\" for the title (never \\\"title\\\"); \\\"data: |\\\" is a CSV multiline string whose first line is comma-separated headers and following lines are comma-separated values; \\\"variant\\\" is one of line, bar, area, pie.\\n\\nNever use a bare \\\"action\\\" key. Forms use \\\"onSubmit\\\", buttons use \\\"onAction\\\", webhooks use \\\"trigger\\\".\\n\\nSelect options are STRINGS — quote both \\\"label\\\" AND \\\"value\\\" when they look numeric or boolean (label: \\\"0\\\", value: \\\"0\\\", never label: 0).\\nQuote any YAML scalar that is, or starts with, a special character (>, <, |, &, *, !, %, @, ?) so it parses, e.g. unit: \\\"%\\\", range: \\\"> 40 mg/dL\\\".\"},{\"role\":\"user\",\"content\":\"callout#rebook-processing(text=\\\"Przetwarzanie zmiany\\\")\\nwebhook#ticketing\\n@lang: pl\"}]",
+          "label": "own-model/prompt.mjs",
+          "config": {}
+        },
+        "promptId": "e2760a156aeab610ecfdae37f795fe173ba7126fbd74bb54da1622b5f8763b11",
+        "promptIdx": 0,
+        "provider": {
+          "id": "openai:chat:mdma-26b",
+          "label": ""
+        },
+        "response": {
+          "output": "```mdma\nid: rebook-processing\ntype: callout\nvariant: info\ncontent: Przetwarzanie zmiany\n```\n\n```mdma\nid: ticketing\ntype: webhook\nurl: https://api.example.com/rebook/v1/ticket\ntrigger: trigger-rebook-notification\n```",
+          "tokenUsage": {
+            "total": 738,
+            "prompt": 667,
+            "completion": 71,
+            "numRequests": 1
+          },
+          "cached": false,
+          "latencyMs": 769,
+          "finishReason": "stop",
+          "guardrails": {
+            "flagged": false
+          },
+          "metadata": {
+            "http": {
+              "status": 200,
+              "statusText": "OK",
+              "headers": {
+                "alt-svc": "h3=\":443\"; ma=2592000",
+                "content-length": "856",
+                "content-type": "application/json",
+                "date": "Tue, 30 Jun 2026 12:35:11 GMT",
+                "modal-function-call-id": "fc-01KWC8DG9JB5V6D17C7JDDHJGG",
+                "vary": "accept-encoding"
+              }
+            }
+          }
+        },
+        "score": 1,
+        "success": true,
+        "testCase": {
+          "description": "hold_flight_rebook_webhook_pl",
+          "vars": {
+            "request": "callout#rebook-processing(text=\"Przetwarzanie zmiany\")\nwebhook#ticketing\n@lang: pl",
+            "expected_mdma": "```mdma\nid: rebook-processing\ntype: callout\nvariant: info\ntitle: \"Przetwarzanie zmiany\"\ncontent: \"Wysyłanie zmiany rezerwacji do systemu biletowego.\"\ndismissible: false\n```\n\n```mdma\nid: ticketing\ntype: webhook\nurl: \"https://ticketing.example.com/api/rebook\"\nmethod: POST\ntrigger: submit-change\nbody:\n  action: \"rebook\"\n```",
+            "domainId": "flight_change",
+            "language": "pl"
+          },
+          "assert": [
+            {
+              "type": "javascript",
+              "value": "file://assertions/validate-mdma.mjs",
+              "config": {
+                "exclude": [
+                  "flow-ordering"
+                ]
+              }
+            }
+          ],
+          "options": {},
+          "metadata": {}
+        },
+        "testIdx": 94,
+        "vars": {
+          "request": "callout#rebook-processing(text=\"Przetwarzanie zmiany\")\nwebhook#ticketing\n@lang: pl",
+          "expected_mdma": "```mdma\nid: rebook-processing\ntype: callout\nvariant: info\ntitle: \"Przetwarzanie zmiany\"\ncontent: \"Wysyłanie zmiany rezerwacji do systemu biletowego.\"\ndismissible: false\n```\n\n```mdma\nid: ticketing\ntype: webhook\nurl: \"https://ticketing.example.com/api/rebook\"\nmethod: POST\ntrigger: submit-change\nbody:\n  action: \"rebook\"\n```",
+          "domainId": "flight_change",
+          "language": "pl"
+        },
+        "metadata": {
+          "http": {
+            "status": 200,
+            "statusText": "OK",
+            "headers": {
+              "alt-svc": "h3=\":443\"; ma=2592000",
+              "content-length": "856",
+              "content-type": "application/json",
+              "date": "Tue, 30 Jun 2026 12:35:11 GMT",
+              "modal-function-call-id": "fc-01KWC8DG9JB5V6D17C7JDDHJGG",
+              "vary": "accept-encoding"
+            }
+          },
+          "_promptfooFileMetadata": {}
+        },
+        "failureReason": 0
+      }
+    ],
+    "stats": {
+      "successes": 95,
+      "failures": 0,
+      "errors": 0,
+      "tokenUsage": {
+        "prompt": 64036,
+        "completion": 12789,
+        "cached": 0,
+        "total": 76825,
+        "numRequests": 95,
+        "completionDetails": {
+          "reasoning": 0,
+          "acceptedPrediction": 0,
+          "rejectedPrediction": 0,
+          "cacheReadInputTokens": 0,
+          "cacheCreationInputTokens": 0
+        },
+        "assertions": {
+          "total": 0,
+          "prompt": 0,
+          "completion": 0,
+          "cached": 0,
+          "numRequests": 0,
+          "completionDetails": {
+            "reasoning": 0,
+            "acceptedPrediction": 0,
+            "rejectedPrediction": 0,
+            "cacheReadInputTokens": 0,
+            "cacheCreationInputTokens": 0
+          }
+        }
+      },
+      "durationMs": 133263,
+      "evaluationDurationMs": 133263
+    }
+  },
+  "config": {
+    "tags": {},
+    "description": "MDMA-IL DSL Holdout Gate — own model",
+    "prompts": [
+      "file:///Users/marcinsadowski/GIT/mr-mdma/evals/own-model/prompt.mjs"
+    ],
+    "providers": [
+      {
+        "id": "openai:chat:mdma-26b",
+        "config": {
+          "apiBaseUrl": "https://REDACTED.modal.run/v1",
+          "apiKey": "[REDACTED]",
+          "temperature": 1,
+          "max_tokens": 2048,
+          "min_p": 0.02,
+          "repetition_penalty": 1.1,
+          "chat_template_kwargs": {
+            "enable_thinking": false
+          }
+        }
+      }
+    ],
+    "tests": [
+      {
+        "description": "reg_generates_a_contact_form_matching_blueprint",
+        "vars": {
+          "request": "form#contact-form[full-name*:t, email*^:e, message*:ta](action=contact-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: contact-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    required: true\n    sensitive: true\n  - name: message\n    type: textarea\n    label: \"Message\"\n    required: true\nonSubmit: contact-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_marks_pii_fields_as_sensitive",
+        "vars": {
+          "request": "form#employee-pii-form[full-name*:t, email*^:e, phone^:t, ssn*^:t](action=employee-pii-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: employee-pii-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email Address\"\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: \"Phone Number\"\n    sensitive: true\n  - name: ssn\n    type: text\n    label: \"Social Security Number\"\n    required: true\n    sensitive: true\nonSubmit: employee-pii-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_includes_a_thinking_block_before_main_content",
+        "vars": {
+          "request": "callout#maintenance-warning(variant=warning)",
+          "expected_mdma": "```mdma\ntype: callout\nid: maintenance-warning\nvariant: warning\ntitle: \"Scheduled Maintenance\"\ncontent: \"Our systems will be undergoing maintenance this weekend from Saturday 10 PM to Sunday 6 AM EST.\"\ndismissible: true\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_does_not_wrap_output_in_markdown_fences",
+        "vars": {
+          "request": "button#submit-report-btn(text=\"Submit Report\", action=submit-report, variant=primary)",
+          "expected_mdma": "```mdma\ntype: button\nid: submit-report-btn\ntext: \"Submit Report\"\nvariant: primary\nonAction: submit-report\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_select_field_has_options",
+        "vars": {
+          "request": "form#country-selector-form[country*:s{us|uk|ca|au|de}](action=country-form-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: country-selector-form\nfields:\n  - name: country\n    type: select\n    label: \"Country\"\n    required: true\n    options:\n      - label: \"United States\"\n        value: us\n      - label: \"United Kingdom\"\n        value: uk\n      - label: \"Canada\"\n        value: ca\n      - label: \"Australia\"\n        value: au\n      - label: \"Germany\"\n        value: de\nonSubmit: country-form-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_generates_a_table_component_matching_blueprint",
+        "vars": {
+          "request": "table#languages-table",
+          "expected_mdma": "```mdma\ntype: table\nid: languages-table\nsortable: true\ncolumns:\n  - key: rank\n    header: \"Rank\"\n    sortable: true\n  - key: language\n    header: \"Language\"\n    sortable: true\n  - key: market-share\n    header: \"Market Share (%)\"\n    sortable: true\ndata:\n  - { rank: 1, language: \"Python\", market-share: 28.1 }\n  - { rank: 2, language: \"JavaScript\", market-share: 21.5 }\n  - { rank: 3, language: \"Java\", market-share: 15.3 }\n  - { rank: 4, language: \"C++\", market-share: 10.2 }\n  - { rank: 5, language: \"TypeScript\", market-share: 8.7 }\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_generates_a_bar_chart_matching_blueprint",
+        "vars": {
+          "request": "chart#quarterly-sales(variant=bar)",
+          "expected_mdma": "```mdma\ntype: chart\nid: quarterly-sales\nvariant: bar\nlabel: \"Quarterly Sales\"\ndata: |\n  Quarter, Sales\n  Q1, 120000\n  Q2, 145000\n  Q3, 98000\n  Q4, 167000\nxAxis: Quarter\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_generates_an_approval_gate_matching_blueprint",
+        "vars": {
+          "request": "approval-gate#document-approval",
+          "expected_mdma": "```mdma\ntype: approval-gate\nid: document-approval\ntitle: \"Document Approval\"\ndescription: \"This document requires sign-off from 2 managers before the release can proceed.\"\nrequiredApprovers: 2\nonApprove: proceed-release\nonDeny: reject-release\nrequireReason: true\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_generates_an_incident_triage_severity_form",
+        "vars": {
+          "request": "form#severity-form[title*:t, reporter-email*^:e, severity*:s{critical|high|medium|low}](action=triage-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: severity-form\nfields:\n  - name: title\n    type: text\n    label: \"Incident Title\"\n    required: true\n  - name: reporter-email\n    type: email\n    label: \"Reporter Email\"\n    required: true\n    sensitive: true\n  - name: severity\n    type: select\n    label: \"Severity\"\n    required: true\n    options:\n      - label: Critical\n        value: critical\n      - label: High\n        value: high\n      - label: Medium\n        value: medium\n      - label: Low\n        value: low\nonSubmit: triage-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_all_component_ids_are_unique_and_kebab_case",
+        "vars": {
+          "request": "form#settings-form[display-name*:t, email*^:e](action=settings-saved)\ncallout#settings-saved(variant=success)",
+          "expected_mdma": "```mdma\ntype: form\nid: settings-form\nfields:\n  - name: display-name\n    type: text\n    label: \"Display Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\nonSubmit: settings-saved\n```\n\n```mdma\ntype: callout\nid: settings-saved\nvariant: success\ncontent: \"Your settings have been saved successfully.\"\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_generates_a_warning_callout_matching_blueprint",
+        "vars": {
+          "request": "callout#password-expiry(variant=warning)",
+          "expected_mdma": "```mdma\ntype: callout\nid: password-expiry\nvariant: warning\ntitle: \"Password Expiring Soon\"\ncontent: \"Your password will expire in 3 days. Please update it to avoid losing access.\"\ndismissible: true\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_generates_an_error_callout_matching_blueprint",
+        "vars": {
+          "request": "callout#payment-error(variant=error)",
+          "expected_mdma": "```mdma\ntype: callout\nid: payment-error\nvariant: error\ntitle: \"Payment Processing Unavailable\"\ncontent: \"Payment processing is currently unavailable. Please try again later or contact support.\"\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_generates_a_success_callout_matching_blueprint",
+        "vars": {
+          "request": "callout#account-verified(variant=success)",
+          "expected_mdma": "```mdma\ntype: callout\nid: account-verified\nvariant: success\ntitle: \"Account Verified\"\ncontent: \"Your account has been successfully verified. You now have full access to all features.\"\ndismissible: true\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_generates_a_pie_chart_matching_blueprint",
+        "vars": {
+          "request": "chart#browser-share(variant=pie)",
+          "expected_mdma": "```mdma\ntype: chart\nid: browser-share\nvariant: pie\nlabel: \"Browser Market Share\"\ndata: |\n  Browser, Share\n  Chrome, 65\n  Safari, 18\n  Firefox, 8\n  Edge, 5\n  Other, 4\nxAxis: Browser\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_button_has_a_confirmation_dialog_matching_blueprint",
+        "vars": {
+          "request": "button#delete-account-btn(text=\"Delete Account\", action=delete-account, variant=danger)",
+          "expected_mdma": "```mdma\ntype: button\nid: delete-account-btn\ntext: \"Delete Account\"\nvariant: danger\nonAction: delete-account\nconfirm:\n  title: \"Delete Account?\"\n  message: \"This action is permanent and cannot be undone. All your data will be deleted.\"\n  confirmText: \"Yes, Delete\"\n  cancelText: \"Cancel\"\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "[REDACTED]",
+        "vars": {
+          "request": "form#ticket-form[subject*:t, description*:ta](action=ticket-submitted)\ncallout#ticket-submitted(variant=success)",
+          "expected_mdma": "```mdma\ntype: form\nid: ticket-form\nfields:\n  - name: subject\n    type: text\n    label: \"Subject\"\n    required: true\n  - name: description\n    type: textarea\n    label: \"Description\"\n    required: true\nonSubmit: ticket-submitted\n```\n\n```mdma\ntype: callout\nid: ticket-submitted\nvariant: success\ncontent: \"Your support ticket has been submitted. We'll get back to you shortly.\"\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "[REDACTED]",
+        "vars": {
+          "request": "table#employee-directory",
+          "expected_mdma": "```mdma\ntype: table\nid: employee-directory\nsortable: true\nfilterable: true\ncolumns:\n  - key: name\n    header: \"Name\"\n    sortable: true\n  - key: department\n    header: \"Department\"\n    sortable: true\n  - key: role\n    header: \"Role\"\n    sortable: true\n  - key: start-date\n    header: \"Start Date\"\n    sortable: true\ndata:\n  - { name: \"Alice Johnson\", department: \"Engineering\", role: \"Senior Developer\", start-date: \"2021-03-15\" }\n  - { name: \"Bob Smith\", department: \"Marketing\", role: \"Campaign Manager\", start-date: \"2022-07-01\" }\n  - { name: \"Carol Davis\", department: \"Engineering\", role: \"Tech Lead\", start-date: \"2020-01-10\" }\n  - { name: \"Dan Wilson\", department: \"Sales\", role: \"Account Executive\", start-date: \"2023-02-20\" }\n  - { name: \"Eve Martinez\", department: \"HR\", role: \"Recruiter\", start-date: \"2022-11-05\" }\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_marks_financial_fields_as_sensitive",
+        "vars": {
+          "request": "form#payment-form[cardholder-name*:t, card-number*^:t, expiry*^:t, cvv*^:t, billing-address*^:ta](action=payment-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: payment-form\nfields:\n  - name: cardholder-name\n    type: text\n    label: \"Cardholder Name\"\n    required: true\n  - name: card-number\n    type: text\n    label: \"Credit Card Number\"\n    required: true\n    sensitive: true\n  - name: expiry\n    type: text\n    label: \"Expiration Date\"\n    required: true\n    sensitive: true\n  - name: cvv\n    type: text\n    label: \"CVV\"\n    required: true\n    sensitive: true\n  - name: billing-address\n    type: textarea\n    label: \"Billing Address\"\n    required: true\n    sensitive: true\nonSubmit: payment-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_marks_mandatory_fields_as_required",
+        "vars": {
+          "request": "form#user-registration[username*:t, email*^:e, password*^:t, bio:ta](action=registration-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: user-registration\nfields:\n  - name: username\n    type: text\n    label: \"Username\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\n  - name: password\n    type: text\n    label: \"Password\"\n    required: true\n    sensitive: true\n  - name: bio\n    type: textarea\n    label: \"Bio\"\nonSubmit: registration-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_uses_binding_expressions_across_components",
+        "vars": {
+          "request": "form#settings-form[notifications-enabled:c](action=notification-info)\ncallout#notification-info(variant=info)",
+          "expected_mdma": "```mdma\ntype: form\nid: settings-form\nfields:\n  - name: notifications-enabled\n    type: checkbox\n    label: \"Enable Notifications\"\nonSubmit: notification-info\n```\n\n```mdma\ntype: callout\nid: notification-info\nvariant: info\ntitle: \"Notification Preferences\"\ncontent: \"You will receive email notifications for account activity, security alerts, and weekly digests.\"\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "[REDACTED]",
+        "vars": {
+          "request": "form#personal-info-form[full-name*:t, email*^:e, phone^:t, dob^:d, ssn*^:t](action=info-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: personal-info-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\n  - name: phone\n    type: text\n    label: \"Phone Number\"\n    sensitive: true\n  - name: dob\n    type: date\n    label: \"Date of Birth\"\n    sensitive: true\n  - name: ssn\n    type: text\n    label: \"Social Security Number\"\n    required: true\n    sensitive: true\nonSubmit: info-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "[REDACTED]",
+        "vars": {
+          "request": "approval-gate#budget-approval",
+          "expected_mdma": "```mdma\ntype: approval-gate\nid: budget-approval\ntitle: \"Budget Approval\"\ndescription: \"All budget requests over $10,000 require approval from authorized personnel.\"\nrequiredApprovers: 3\nallowedRoles:\n  - finance-manager\n  - director\nonApprove: budget-approved\nonDeny: budget-denied\nrequireReason: true\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_marks_medical_and_personal_health_fields_as_sensitive",
+        "vars": {
+          "request": "form#patient-intake[full-name*:t, dob*^:d, email^:e, phone^:t, medical-record*^:t, medications^:ta, allergies^:ta](action=patient-intake-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: patient-intake\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: dob\n    type: date\n    label: \"Date of Birth\"\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: \"Email\"\n    sensitive: true\n  - name: phone\n    type: text\n    label: \"Phone Number\"\n    sensitive: true\n  - name: medical-record\n    type: text\n    label: \"Medical Record Number\"\n    required: true\n    sensitive: true\n  - name: medications\n    type: textarea\n    label: \"Current Medications\"\n    sensitive: true\n  - name: allergies\n    type: textarea\n    label: \"Known Allergies\"\n    sensitive: true\nonSubmit: patient-intake-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_generates_multiple_chart_types_in_a_dashboard",
+        "vars": {
+          "request": "chart#monthly-revenue(variant=bar)\nchart#category-breakdown(variant=pie)\ntable#region-summary",
+          "expected_mdma": "```mdma\ntype: chart\nid: monthly-revenue\nvariant: bar\nlabel: \"Monthly Revenue\"\ndata: |\n  Month, Revenue\n  January, 85000\n  February, 92000\n  March, 78000\n  April, 105000\n  May, 99000\n  June, 115000\nxAxis: Month\n```\n\n```mdma\ntype: chart\nid: category-breakdown\nvariant: pie\nlabel: \"Revenue by Category\"\ndata: |\n  Category, Revenue\n  Electronics, 180000\n  Clothing, 120000\n  Food, 95000\n  Services, 79000\nxAxis: Category\n```\n\n```mdma\ntype: table\nid: region-summary\nsortable: true\ncolumns:\n  - key: region\n    header: \"Region\"\n    sortable: true\n  - key: revenue\n    header: \"Revenue ($)\"\n    sortable: true\n  - key: growth\n    header: \"Growth (%)\"\n    sortable: true\ndata:\n  - { region: \"North America\", revenue: 250000, growth: 12.5 }\n  - { region: \"Europe\", revenue: 180000, growth: 8.3 }\n  - { region: \"Asia Pacific\", revenue: 145000, growth: 22.1 }\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_generates_a_form_with_a_basic_file_upload_field",
+        "vars": {
+          "request": "form#resume-form[full-name*:t, resume*:f](action=resume-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: resume-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: resume\n    type: file\n    label: \"Resume\"\n    required: true\nonSubmit: resume-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_marks_a_sensitive_file_upload_passport_as_sensitive",
+        "vars": {
+          "request": "form#kyc-form[full-name*^:t, passport-scan*^:f](action=kyc-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: kyc-form\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n    sensitive: true\n  - name: passport-scan\n    type: file\n    label: \"Passport Scan\"\n    required: true\n    sensitive: true\nonSubmit: kyc-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "reg_preserves_a_specific_component_id_requested_by_the_user",
+        "vars": {
+          "request": "form#devcon-2026-registration[full-name*:t, email*^:e](action=devcon-registration-submitted)",
+          "expected_mdma": "```mdma\ntype: form\nid: devcon-2026-registration\nfields:\n  - name: full-name\n    type: text\n    label: \"Full Name\"\n    required: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\nonSubmit: devcon-registration-submitted\n```",
+          "domainId": "regression",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_medical_intake_en",
+        "vars": {
+          "request": "callout#consent-notice(text=\"Consent to treatment\")\nform#intake-form[full_name*:t, date_of_birth*^:d, email*^:e, visit_reason*:ta](action=submit-intake)",
+          "expected_mdma": "```mdma\nid: consent-notice\ntype: callout\nvariant: info\ntitle: \"Consent to treatment\"\ncontent: \"By submitting this form you consent to be treated at this clinic and confirm the information is accurate.\"\ndismissible: false\n```\n\n```mdma\nid: intake-form\ntype: form\nonSubmit: submit-intake\nfields:\n  - name: full_name\n    type: text\n    label: \"Full name\"\n    required: true\n  - name: date_of_birth\n    type: date\n    label: \"Date of birth\"\n    required: true\n    sensitive: true\n  - name: email\n    type: email\n    label: \"Email\"\n    required: true\n    sensitive: true\n  - name: visit_reason\n    type: textarea\n    label: \"Reason for visit\"\n    required: true\n```",
+          "domainId": "medical_intake_clinic",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_prescription_refill_pl",
+        "vars": {
+          "request": "form#refill-form[medication*:t, dosage*:t, pharmacy*:s{centrum|stare-miasto|dworzec}, last_fill:d](action=request-refill)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: refill-form\ntype: form\nonSubmit: request-refill\nfields:\n  - name: medication\n    type: text\n    label: \"Nazwa leku\"\n    required: true\n  - name: dosage\n    type: text\n    label: \"Dawka\"\n    required: true\n  - name: pharmacy\n    type: select\n    label: \"Apteka\"\n    required: true\n    options:\n      - { label: \"Centrum\", value: \"centrum\" }\n      - { label: \"Stare Miasto\", value: \"stare-miasto\" }\n      - { label: \"Dworzec\", value: \"dworzec\" }\n  - name: last_fill\n    type: date\n    label: \"Data ostatniej realizacji\"\n```",
+          "domainId": "prescription_refill",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_symptom_checker_en",
+        "vars": {
+          "request": "callout#emergency-disclaimer(text=\"Not for emergencies\")\nform#triage-form[main_symptom*:t, severity*:s{mild|moderate|severe}, duration_days*:n](action=submit-triage)",
+          "expected_mdma": "```mdma\nid: emergency-disclaimer\ntype: callout\nvariant: warning\ntitle: \"Not for emergencies\"\ncontent: \"If this is a medical emergency call your local emergency number now. This tool does not provide a diagnosis.\"\ndismissible: false\n```\n\n```mdma\nid: triage-form\ntype: form\nonSubmit: submit-triage\nfields:\n  - name: main_symptom\n    type: text\n    label: \"Main symptom\"\n    required: true\n  - name: severity\n    type: select\n    label: \"Severity\"\n    required: true\n    options:\n      - { label: \"Mild\", value: \"mild\" }\n      - { label: \"Moderate\", value: \"moderate\" }\n      - { label: \"Severe\", value: \"severe\" }\n  - name: duration_days\n    type: number\n    label: \"Days affected\"\n    required: true\n```",
+          "domainId": "symptom_checker_intake",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_lab_results_table_en",
+        "vars": {
+          "request": "callout#range-note(text=\"Reference ranges\")\ntable#results-table",
+          "expected_mdma": "```mdma\nid: range-note\ntype: callout\nvariant: info\ntitle: \"Reference ranges\"\ncontent: \"Values outside the reference range are flagged. Discuss any concerns with your physician.\"\ndismissible: true\n```\n\n```mdma\nid: results-table\ntype: table\ncolumns:\n  - { key: test, header: \"Test\" }\n  - { key: value, header: \"Value\" }\n  - { key: range, header: \"Reference range\" }\n  - { key: flag, header: \"Flag\" }\ndata:\n  - { test: \"Hemoglobin\", value: \"13.8 g/dL\", range: \"13.5-17.5\", flag: \"normal\" }\n  - { test: \"Glucose\", value: \"112 mg/dL\", range: \"70-99\", flag: \"high\" }\n```",
+          "domainId": "lab_results_review",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_lab_results_chart_en",
+        "vars": {
+          "request": "chart#glucose-trend(variant=line)",
+          "expected_mdma": "```mdma\nid: glucose-trend\ntype: chart\nvariant: line\ndata: \"{{labs.glucose_history}}\"\nxAxis: date\nyAxis: value\n```",
+          "domainId": "lab_results_review",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_appointment_booking_pl",
+        "vars": {
+          "request": "callout#cancel-policy(text=\"Odwołania\")\nform#booking-form[specialty*:s{kardiolog|dermatolog|ortopeda}, preferred_date*:d, insurance_number*^:t](action=book-appointment)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: cancel-policy\ntype: callout\nvariant: info\ntitle: \"Odwołania\"\ncontent: \"Wizytę można bezpłatnie odwołać najpóźniej 24 godziny przed terminem.\"\ndismissible: true\n```\n\n```mdma\nid: booking-form\ntype: form\nonSubmit: book-appointment\nfields:\n  - name: specialty\n    type: select\n    label: \"Specjalizacja\"\n    required: true\n    options:\n      - { label: \"Kardiolog\", value: \"kardiolog\" }\n      - { label: \"Dermatolog\", value: \"dermatolog\" }\n      - { label: \"Ortopeda\", value: \"ortopeda\" }\n  - name: preferred_date\n    type: date\n    label: \"Preferowana data\"\n    required: true\n  - name: insurance_number\n    type: text\n    label: \"Numer ubezpieczenia\"\n    required: true\n    sensitive: true\n```",
+          "domainId": "appointment_booking",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_vaccination_record_en",
+        "vars": {
+          "request": "form#vaccine-form[vaccine_type*:s{influenza|tetanus|covid-19}, date_administered*:d, batch_number*:t](action=save-vaccination)",
+          "expected_mdma": "```mdma\nid: vaccine-form\ntype: form\nonSubmit: save-vaccination\nfields:\n  - name: vaccine_type\n    type: select\n    label: \"Vaccine\"\n    required: true\n    options:\n      - { label: \"Influenza\", value: \"influenza\" }\n      - { label: \"Tetanus\", value: \"tetanus\" }\n      - { label: \"COVID-19\", value: \"covid-19\" }\n  - name: date_administered\n    type: date\n    label: \"Date administered\"\n    required: true\n  - name: batch_number\n    type: text\n    label: \"Batch number\"\n    required: true\n```",
+          "domainId": "vaccination_record",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_clinical_trial_consent_en",
+        "vars": {
+          "request": "callout#consent-info(text=\"Informed consent\")\napproval-gate#investigator-signoff(text=\"Investigator sign-off\")",
+          "expected_mdma": "```mdma\nid: consent-info\ntype: callout\nvariant: warning\ntitle: \"Informed consent\"\ncontent: \"Read the full study information sheet. Participation is voluntary and you may withdraw at any time.\"\ndismissible: false\n```\n\n```mdma\nid: investigator-signoff\ntype: approval-gate\ntitle: \"Investigator sign-off\"\ndescription: \"A principal investigator must confirm eligibility before enrollment.\"\nrequiredApprovers: 1\nallowedRoles: [investigator]\nonApprove: enroll-participant\nonDeny: decline-enrollment\nrequireReason: true\n```",
+          "domainId": "clinical_trial_consent",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_telehealth_followup_pl",
+        "vars": {
+          "request": "form#followup-form[pain_scale*:s{0|3|7}, adherence:c, notes:ta](action=submit-followup)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: followup-form\ntype: form\nonSubmit: submit-followup\nfields:\n  - name: pain_scale\n    type: select\n    label: \"Skala bólu\"\n    required: true\n    options:\n      - { label: \"Brak\", value: \"0\" }\n      - { label: \"Łagodny\", value: \"3\" }\n      - { label: \"Silny\", value: \"7\" }\n  - name: adherence\n    type: checkbox\n    label: \"Przyjmuję leki zgodnie z zaleceniami\"\n  - name: notes\n    type: textarea\n    label: \"Uwagi\"\n```",
+          "domainId": "telehealth_followup",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_kyc_basic_en",
+        "vars": {
+          "request": "callout#kyc-notice(text=\"Identity verification\")\nform#kyc-form[legal_name*:t, national_id*^:t, id_document*^:f](action=submit-kyc)",
+          "expected_mdma": "```mdma\nid: kyc-notice\ntype: callout\nvariant: info\ntitle: \"Identity verification\"\ncontent: \"Your national ID and document are stored securely and used only for compliance review.\"\ndismissible: false\n```\n\n```mdma\nid: kyc-form\ntype: form\nonSubmit: submit-kyc\nfields:\n  - name: legal_name\n    type: text\n    label: \"Legal name\"\n    required: true\n  - name: national_id\n    type: text\n    label: \"National ID number\"\n    required: true\n    sensitive: true\n  - name: id_document\n    type: file\n    label: \"Photo ID upload\"\n    required: true\n    sensitive: true\n```",
+          "domainId": "kyc_basic",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_loan_application_pl",
+        "vars": {
+          "request": "callout#apr-disclosure(text=\"RRSO\")\nform#loan-form[amount*:n, monthly_income*^:n, purpose*:s{konsolidacja|remont|inny}](action=submit-loan)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: apr-disclosure\ntype: callout\nvariant: info\ntitle: \"RRSO\"\ncontent: \"Rzeczywista roczna stopa oprocentowania zależy od oceny zdolności kredytowej i zostanie przedstawiona przed podpisaniem umowy.\"\ndismissible: false\n```\n\n```mdma\nid: loan-form\ntype: form\nonSubmit: submit-loan\nfields:\n  - name: amount\n    type: number\n    label: \"Kwota pożyczki\"\n    required: true\n  - name: monthly_income\n    type: number\n    label: \"Miesięczny dochód\"\n    required: true\n    sensitive: true\n  - name: purpose\n    type: select\n    label: \"Cel\"\n    required: true\n    options:\n      - { label: \"Konsolidacja\", value: \"konsolidacja\" }\n      - { label: \"Remont\", value: \"remont\" }\n      - { label: \"Inny\", value: \"inny\" }\n```",
+          "domainId": "loan_application",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_expense_report_en",
+        "vars": {
+          "request": "form#expense-form[category*:s{travel|meals|software}, total*:n](action=submit-expense)\ntable#line-items",
+          "expected_mdma": "```mdma\nid: expense-form\ntype: form\nonSubmit: submit-expense\nfields:\n  - name: category\n    type: select\n    label: \"Category\"\n    required: true\n    options:\n      - { label: \"Travel\", value: \"travel\" }\n      - { label: \"Meals\", value: \"meals\" }\n      - { label: \"Software\", value: \"software\" }\n  - name: total\n    type: number\n    label: \"Total amount\"\n    required: true\n```\n\n```mdma\nid: line-items\ntype: table\ncolumns:\n  - { key: item, header: \"Item\" }\n  - { key: date, header: \"Date\" }\n  - { key: amount, header: \"Amount\" }\ndata:\n  - { item: \"Flight\", date: \"2026-05-02\", amount: 320 }\n  - { item: \"Hotel\", date: \"2026-05-03\", amount: 180 }\n```",
+          "domainId": "expense_report",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_wire_transfer_en",
+        "vars": {
+          "request": "approval-gate#dual-approval(text=\"Dual approval\")\nwebhook#payment-rail",
+          "expected_mdma": "```mdma\nid: dual-approval\ntype: approval-gate\ntitle: \"Dual approval\"\ndescription: \"Two authorized signers must approve outbound international transfers.\"\nrequiredApprovers: 2\nallowedRoles: [treasury]\nonApprove: release-wire\nonDeny: cancel-wire\nrequireReason: true\n```\n\n```mdma\nid: payment-rail\ntype: webhook\nurl: \"https://payments.example.com/api/v1/wire\"\nmethod: POST\ntrigger: release-wire\nbody:\n  reference: \"wire-transfer\"\n```",
+          "domainId": "wire_transfer_setup",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_budget_dashboard_pl",
+        "vars": {
+          "request": "callout#overspend-warning(text=\"Przekroczenie budżetu\")\nchart#spend-by-category(variant=pie)\ntable#transactions\n@lang: pl",
+          "expected_mdma": "```mdma\nid: overspend-warning\ntype: callout\nvariant: warning\ntitle: \"Przekroczenie budżetu\"\ncontent: \"Wydatki w kategorii Rozrywka przekroczyły zaplanowany limit o 15%.\"\ndismissible: false\n```\n\n```mdma\nid: spend-by-category\ntype: chart\nvariant: pie\ndata: \"{{budget.by_category}}\"\n```\n\n```mdma\nid: transactions\ntype: table\ncolumns:\n  - { key: merchant, header: \"Sprzedawca\" }\n  - { key: category, header: \"Kategoria\" }\n  - { key: amount, header: \"Kwota\" }\ndata:\n  - { merchant: \"Biedronka\", category: \"Spożywcze\", amount: 142 }\n  - { merchant: \"Netflix\", category: \"Rozrywka\", amount: 43 }\n```",
+          "domainId": "budget_dashboard",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_card_dispute_en",
+        "vars": {
+          "request": "callout#timeline-note(text=\"What happens next\")\nform#dispute-form[transaction_id*:t, reason*:s{unauthorized|duplicate|not-received}, evidence:f](action=submit-dispute)",
+          "expected_mdma": "```mdma\nid: timeline-note\ntype: callout\nvariant: info\ntitle: \"What happens next\"\ncontent: \"Disputes are typically resolved within 10 business days. A provisional credit may be issued while we investigate.\"\ndismissible: false\n```\n\n```mdma\nid: dispute-form\ntype: form\nonSubmit: submit-dispute\nfields:\n  - name: transaction_id\n    type: text\n    label: \"Transaction ID\"\n    required: true\n  - name: reason\n    type: select\n    label: \"Reason\"\n    required: true\n    options:\n      - { label: \"Unauthorized\", value: \"unauthorized\" }\n      - { label: \"Duplicate charge\", value: \"duplicate\" }\n      - { label: \"Item not received\", value: \"not-received\" }\n  - name: evidence\n    type: file\n    label: \"Supporting evidence\"\n```",
+          "domainId": "card_dispute",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_crypto_onboarding_en",
+        "vars": {
+          "request": "callout#risk-disclosure(text=\"Risk disclosure\")\nform#crypto-form[tax_id*^:t, source_of_funds*:s{salary|savings|business}](action=submit-onboarding)",
+          "expected_mdma": "```mdma\nid: risk-disclosure\ntype: callout\nvariant: warning\ntitle: \"Risk disclosure\"\ncontent: \"Crypto assets are volatile and you may lose your entire investment. Only invest what you can afford to lose.\"\ndismissible: false\n```\n\n```mdma\nid: crypto-form\ntype: form\nonSubmit: submit-onboarding\nfields:\n  - name: tax_id\n    type: text\n    label: \"Tax identification number\"\n    required: true\n    sensitive: true\n  - name: source_of_funds\n    type: select\n    label: \"Source of funds\"\n    required: true\n    options:\n      - { label: \"Salary\", value: \"salary\" }\n      - { label: \"Savings\", value: \"savings\" }\n      - { label: \"Business income\", value: \"business\" }\n```",
+          "domainId": "crypto_onboarding",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_invoice_create_en",
+        "vars": {
+          "request": "form#invoice-form[client_email*^:e, due_date*:d, tax_rate*:s{0|8|23}](action=create-invoice)\ntable#invoice-items",
+          "expected_mdma": "```mdma\nid: invoice-form\ntype: form\nonSubmit: create-invoice\nfields:\n  - name: client_email\n    type: email\n    label: \"Client email\"\n    required: true\n    sensitive: true\n  - name: due_date\n    type: date\n    label: \"Due date\"\n    required: true\n  - name: tax_rate\n    type: select\n    label: \"Tax rate\"\n    required: true\n    options:\n      - { label: \"0%\", value: \"0\" }\n      - { label: \"8%\", value: \"8\" }\n      - { label: \"23%\", value: \"23\" }\n```\n\n```mdma\nid: invoice-items\ntype: table\ncolumns:\n  - { key: description, header: \"Description\" }\n  - { key: hours, header: \"Hours\" }\n  - { key: rate, header: \"Rate\" }\ndata:\n  - { description: \"UI design\", hours: 12, rate: 90 }\n  - { description: \"Frontend build\", hours: 20, rate: 95 }\n```",
+          "domainId": "invoice_create",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_employee_onboarding_en",
+        "vars": {
+          "request": "callout#onboarding-welcome(text=\"Welcome aboard\")\ntasklist#onboarding-tasks(action=complete-onboarding)",
+          "expected_mdma": "```mdma\nid: onboarding-welcome\ntype: callout\nvariant: success\ntitle: \"Welcome aboard\"\ncontent: \"Work through each item below to finish your first-day setup.\"\ndismissible: false\n```\n\n```mdma\nid: onboarding-tasks\ntype: tasklist\nonComplete: complete-onboarding\nitems:\n  - id: sign-contract\n    text: \"Sign employment contract\"\n  - id: setup-email\n    text: \"Set up company email\"\n  - id: read-handbook\n    text: \"Read the employee handbook\"\n```",
+          "domainId": "employee_onboarding",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_leave_request_pl",
+        "vars": {
+          "request": "form#leave-form[leave_type*:s{wypoczynkowy|na-zadanie|bezplatny}, start_date*:d, end_date*:d, note:ta](action=submit-leave)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: leave-form\ntype: form\nonSubmit: submit-leave\nfields:\n  - name: leave_type\n    type: select\n    label: \"Rodzaj urlopu\"\n    required: true\n    options:\n      - { label: \"Wypoczynkowy\", value: \"wypoczynkowy\" }\n      - { label: \"Na żądanie\", value: \"na-zadanie\" }\n      - { label: \"Bezpłatny\", value: \"bezplatny\" }\n  - name: start_date\n    type: date\n    label: \"Data rozpoczęcia\"\n    required: true\n  - name: end_date\n    type: date\n    label: \"Data zakończenia\"\n    required: true\n  - name: note\n    type: textarea\n    label: \"Uzasadnienie\"\n```",
+          "domainId": "leave_request",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_performance_review_en",
+        "vars": {
+          "request": "callout#confidentiality(text=\"Confidential\")\nform#review-form[delivery*:s{below|meets|exceeds}, collaboration*:s{below|meets|exceeds}, summary*:ta](action=submit-review)",
+          "expected_mdma": "```mdma\nid: confidentiality\ntype: callout\nvariant: info\ntitle: \"Confidential\"\ncontent: \"Your self-assessment is shared only with your manager and HR.\"\ndismissible: false\n```\n\n```mdma\nid: review-form\ntype: form\nonSubmit: submit-review\nfields:\n  - name: delivery\n    type: select\n    label: \"Delivery\"\n    required: true\n    options:\n      - { label: \"Below\", value: \"below\" }\n      - { label: \"Meets\", value: \"meets\" }\n      - { label: \"Exceeds\", value: \"exceeds\" }\n  - name: collaboration\n    type: select\n    label: \"Collaboration\"\n    required: true\n    options:\n      - { label: \"Below\", value: \"below\" }\n      - { label: \"Meets\", value: \"meets\" }\n      - { label: \"Exceeds\", value: \"exceeds\" }\n  - name: summary\n    type: textarea\n    label: \"Summary\"\n    required: true\n```",
+          "domainId": "performance_review",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_expense_reimbursement_pl",
+        "vars": {
+          "request": "form#reimb-form[amount*:n, per_diem*:s{krajowa|zagraniczna}, receipt*:f](action=submit-reimbursement)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: reimb-form\ntype: form\nonSubmit: submit-reimbursement\nfields:\n  - name: amount\n    type: number\n    label: \"Kwota\"\n    required: true\n  - name: per_diem\n    type: select\n    label: \"Dieta\"\n    required: true\n    options:\n      - { label: \"Krajowa\", value: \"krajowa\" }\n      - { label: \"Zagraniczna\", value: \"zagraniczna\" }\n  - name: receipt\n    type: file\n    label: \"Paragon\"\n    required: true\n```",
+          "domainId": "expense_reimbursement_hr",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_headcount_dashboard_en",
+        "vars": {
+          "request": "callout#freeze-note(text=\"Hiring freeze\")\nchart#headcount-by-dept(variant=bar)\ntable#attrition",
+          "expected_mdma": "```mdma\nid: freeze-note\ntype: callout\nvariant: warning\ntitle: \"Hiring freeze\"\ncontent: \"A hiring freeze is in effect for non-critical roles through the end of the quarter.\"\ndismissible: false\n```\n\n```mdma\nid: headcount-by-dept\ntype: chart\nvariant: bar\ndata: \"{{hr.headcount_by_department}}\"\nxAxis: department\nyAxis: count\n```\n\n```mdma\nid: attrition\ntype: table\ncolumns:\n  - { key: department, header: \"Department\" }\n  - { key: attrition, header: \"Attrition %\" }\ndata:\n  - { department: \"Engineering\", attrition: \"6%\" }\n  - { department: \"Sales\", attrition: \"11%\" }\n```",
+          "domainId": "headcount_dashboard",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_offboarding_checklist_en",
+        "vars": {
+          "request": "tasklist#offboarding-tasks(action=complete-offboarding)",
+          "expected_mdma": "```mdma\nid: offboarding-tasks\ntype: tasklist\nonComplete: complete-offboarding\nitems:\n  - id: return-laptop\n    text: \"Return company laptop\"\n  - id: revoke-access\n    text: \"Revoke system access\"\n  - id: exit-interview\n    text: \"Complete exit interview\"\n```",
+          "domainId": "offboarding_checklist",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_insurance_claim_auto_pl",
+        "vars": {
+          "request": "callout#fraud-warning(text=\"Ostrzeżenie\")\nform#claim-form[policy_number*^:t, incident_date*:d, photos*:f](action=submit-claim)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: fraud-warning\ntype: callout\nvariant: error\ntitle: \"Ostrzeżenie\"\ncontent: \"Podanie nieprawdziwych informacji w zgłoszeniu szkody może skutkować odpowiedzialnością karną.\"\ndismissible: false\n```\n\n```mdma\nid: claim-form\ntype: form\nonSubmit: submit-claim\nfields:\n  - name: policy_number\n    type: text\n    label: \"Numer polisy\"\n    required: true\n    sensitive: true\n  - name: incident_date\n    type: date\n    label: \"Data zdarzenia\"\n    required: true\n  - name: photos\n    type: file\n    label: \"Zdjęcia uszkodzeń\"\n    required: true\n```",
+          "domainId": "insurance_claim_auto",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_policy_update_en",
+        "vars": {
+          "request": "form#policy-form[coverage*:s{basic|standard|premium}, effective_date*:d](action=submit-policy-update)",
+          "expected_mdma": "```mdma\nid: policy-form\ntype: form\nonSubmit: submit-policy-update\nfields:\n  - name: coverage\n    type: select\n    label: \"Coverage level\"\n    required: true\n    options:\n      - { label: \"Basic\", value: \"basic\" }\n      - { label: \"Standard\", value: \"standard\" }\n      - { label: \"Premium\", value: \"premium\" }\n  - name: effective_date\n    type: date\n    label: \"Effective date\"\n    required: true\n```",
+          "domainId": "policy_update",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_beneficiary_change_pl",
+        "vars": {
+          "request": "callout#legal-notice(text=\"Informacja prawna\")\nform#beneficiary-form[beneficiary_name*^:t, relationship*:s{malzonek|dziecko|inne}, share_percent*:n](action=submit-beneficiary)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: legal-notice\ntype: callout\nvariant: info\ntitle: \"Informacja prawna\"\ncontent: \"Zmiana uposażonego wchodzi w życie po weryfikacji tożsamości i podpisaniu wniosku.\"\ndismissible: false\n```\n\n```mdma\nid: beneficiary-form\ntype: form\nonSubmit: submit-beneficiary\nfields:\n  - name: beneficiary_name\n    type: text\n    label: \"Imię i nazwisko uposażonego\"\n    required: true\n    sensitive: true\n  - name: relationship\n    type: select\n    label: \"Pokrewieństwo\"\n    required: true\n    options:\n      - { label: \"Małżonek\", value: \"malzonek\" }\n      - { label: \"Dziecko\", value: \"dziecko\" }\n      - { label: \"Inne\", value: \"inne\" }\n  - name: share_percent\n    type: number\n    label: \"Udział (%)\"\n    required: true\n```",
+          "domainId": "beneficiary_change",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_claims_table_en",
+        "vars": {
+          "request": "table#open-claims\nbutton#assign-next(text=\"Assign next claim\", action=assign-claim, variant=primary)",
+          "expected_mdma": "```mdma\nid: open-claims\ntype: table\ncolumns:\n  - { key: claim_id, header: \"Claim\" }\n  - { key: type, header: \"Type\" }\n  - { key: status, header: \"Status\" }\ndata:\n  - { claim_id: \"CLM-1042\", type: \"Auto\", status: \"In review\" }\n  - { claim_id: \"CLM-1043\", type: \"Home\", status: \"Pending docs\" }\n```\n\n```mdma\nid: assign-next\ntype: button\ntext: \"Assign next claim\"\nvariant: primary\nonAction: assign-claim\n```",
+          "domainId": "claims_dashboard",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_claims_chart_en",
+        "vars": {
+          "request": "chart#claims-by-status(variant=bar)",
+          "expected_mdma": "```mdma\nid: claims-by-status\ntype: chart\nvariant: bar\ndata: \"{{claims.by_status}}\"\nxAxis: status\nyAxis: count\n```",
+          "domainId": "claims_dashboard",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_quote_request_pl",
+        "vars": {
+          "request": "form#quote-form[property_type*:s{dom|mieszkanie}, build_year*:n, sum_insured*:n](action=request-quote)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: quote-form\ntype: form\nonSubmit: request-quote\nfields:\n  - name: property_type\n    type: select\n    label: \"Typ nieruchomości\"\n    required: true\n    options:\n      - { label: \"Dom\", value: \"dom\" }\n      - { label: \"Mieszkanie\", value: \"mieszkanie\" }\n  - name: build_year\n    type: number\n    label: \"Rok budowy\"\n    required: true\n  - name: sum_insured\n    type: number\n    label: \"Suma ubezpieczenia\"\n    required: true\n```",
+          "domainId": "quote_request",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_claim_webhook_notify_en",
+        "vars": {
+          "request": "callout#audit-note(text=\"Audit\")\nform#status-form[claim_id*:t, status*:s{approved|denied|more-info}](action=update-status)",
+          "expected_mdma": "```mdma\nid: audit-note\ntype: callout\nvariant: info\ntitle: \"Audit\"\ncontent: \"Every status change is written to the immutable audit log.\"\ndismissible: false\n```\n\n```mdma\nid: status-form\ntype: form\nonSubmit: update-status\nfields:\n  - name: claim_id\n    type: text\n    label: \"Claim ID\"\n    required: true\n  - name: status\n    type: select\n    label: \"New status\"\n    required: true\n    options:\n      - { label: \"Approved\", value: \"approved\" }\n      - { label: \"Denied\", value: \"denied\" }\n      - { label: \"More info needed\", value: \"more-info\" }\n```",
+          "domainId": "claim_webhook_notify",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_ecommerce_return_en",
+        "vars": {
+          "request": "form#return-form[order_id*:t, reason*:s{damaged|wrong-item|no-longer-needed}, prefer_exchange:c](action=submit-return)",
+          "expected_mdma": "```mdma\nid: return-form\ntype: form\nonSubmit: submit-return\nfields:\n  - name: order_id\n    type: text\n    label: \"Order number\"\n    required: true\n  - name: reason\n    type: select\n    label: \"Reason for return\"\n    required: true\n    options:\n      - { label: \"Damaged\", value: \"damaged\" }\n      - { label: \"Wrong item\", value: \"wrong-item\" }\n      - { label: \"No longer needed\", value: \"no-longer-needed\" }\n  - name: prefer_exchange\n    type: checkbox\n    label: \"I would prefer an exchange\"\n```",
+          "domainId": "ecommerce_return",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_custom_order_en",
+        "vars": {
+          "request": "callout#lead-time(text=\"Lead time\")\nform#custom-form[design_file*:f, quantity*:n, material*:s{cotton|polyester}](action=submit-order)",
+          "expected_mdma": "```mdma\nid: lead-time\ntype: callout\nvariant: info\ntitle: \"Lead time\"\ncontent: \"Custom orders ship within 3-4 weeks once the design is approved.\"\ndismissible: true\n```\n\n```mdma\nid: custom-form\ntype: form\nonSubmit: submit-order\nfields:\n  - name: design_file\n    type: file\n    label: \"Design file\"\n    required: true\n  - name: quantity\n    type: number\n    label: \"Quantity\"\n    required: true\n  - name: material\n    type: select\n    label: \"Material\"\n    required: true\n    options:\n      - { label: \"Cotton\", value: \"cotton\" }\n      - { label: \"Polyester\", value: \"polyester\" }\n```",
+          "domainId": "custom_order",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_product_review_pl",
+        "vars": {
+          "request": "form#review-form[rating*:s{1|3|5}, title*:t, body*:ta, verified:c](action=submit-product-review)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: review-form\ntype: form\nonSubmit: submit-product-review\nfields:\n  - name: rating\n    type: select\n    label: \"Ocena\"\n    required: true\n    options:\n      - { label: \"1 gwiazdka\", value: \"1\" }\n      - { label: \"3 gwiazdki\", value: \"3\" }\n      - { label: \"5 gwiazdek\", value: \"5\" }\n  - name: title\n    type: text\n    label: \"Tytuł\"\n    required: true\n  - name: body\n    type: textarea\n    label: \"Treść opinii\"\n    required: true\n  - name: verified\n    type: checkbox\n    label: \"Zakup potwierdzony\"\n```",
+          "domainId": "product_review",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_seller_payout_en",
+        "vars": {
+          "request": "form#payout-form[account_holder*:t, iban*^:t, schedule*:s{weekly|monthly}](action=submit-payout)",
+          "expected_mdma": "```mdma\nid: payout-form\ntype: form\nonSubmit: submit-payout\nfields:\n  - name: account_holder\n    type: text\n    label: \"Account holder\"\n    required: true\n  - name: iban\n    type: text\n    label: \"IBAN\"\n    required: true\n    sensitive: true\n  - name: schedule\n    type: select\n    label: \"Payout schedule\"\n    required: true\n    options:\n      - { label: \"Weekly\", value: \"weekly\" }\n      - { label: \"Monthly\", value: \"monthly\" }\n```",
+          "domainId": "seller_payout",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_sales_dashboard_en",
+        "vars": {
+          "request": "callout#low-stock(text=\"Low stock\")\nchart#revenue-over-time(variant=line)\ntable#top-products",
+          "expected_mdma": "```mdma\nid: low-stock\ntype: callout\nvariant: warning\ntitle: \"Low stock\"\ncontent: \"Three best-selling products are below the reorder threshold.\"\ndismissible: false\n```\n\n```mdma\nid: revenue-over-time\ntype: chart\nvariant: line\ndata: \"{{sales.revenue_daily}}\"\nxAxis: date\nyAxis: revenue\n```\n\n```mdma\nid: top-products\ntype: table\ncolumns:\n  - { key: product, header: \"Product\" }\n  - { key: units, header: \"Units\" }\n  - { key: revenue, header: \"Revenue\" }\ndata:\n  - { product: \"Wireless earbuds\", units: 412, revenue: 28800 }\n  - { product: \"Phone case\", units: 980, revenue: 11760 }\n```",
+          "domainId": "sales_dashboard",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_permit_application_pl",
+        "vars": {
+          "request": "callout#fee-note(text=\"Opłata\")\napproval-gate#office-review(text=\"Decyzja urzędu\")\n@lang: pl",
+          "expected_mdma": "```mdma\nid: fee-note\ntype: callout\nvariant: info\ntitle: \"Opłata\"\ncontent: \"Opłata skarbowa za wydanie pozwolenia jest naliczana po wstępnej weryfikacji wniosku.\"\ndismissible: false\n```\n\n```mdma\nid: office-review\ntype: approval-gate\ntitle: \"Decyzja urzędu\"\nrequiredApprovers: 1\nallowedRoles: [urzednik]\nonApprove: issue-permit\nonDeny: reject-permit\nrequireReason: false\n```",
+          "domainId": "permit_application",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_tax_filing_en",
+        "vars": {
+          "request": "callout#deadline(text=\"Filing deadline\")\nform#tax-form[tax_id*^:t, filing_status*:s{single|married-joint}](action=submit-return)\ntable#income-items",
+          "expected_mdma": "```mdma\nid: deadline\ntype: callout\nvariant: warning\ntitle: \"Filing deadline\"\ncontent: \"Returns submitted after the deadline may incur a late-filing penalty.\"\ndismissible: false\n```\n\n```mdma\nid: tax-form\ntype: form\nonSubmit: submit-return\nfields:\n  - name: tax_id\n    type: text\n    label: \"Tax identification number\"\n    required: true\n    sensitive: true\n  - name: filing_status\n    type: select\n    label: \"Filing status\"\n    required: true\n    options:\n      - { label: \"Single\", value: \"single\" }\n      - { label: \"Married joint\", value: \"married-joint\" }\n```\n\n```mdma\nid: income-items\ntype: table\ncolumns:\n  - { key: source, header: \"Source\" }\n  - { key: amount, header: \"Amount\" }\ndata:\n  - { source: \"Employment\", amount: 62000 }\n  - { source: \"Dividends\", amount: 1400 }\n```",
+          "domainId": "tax_filing",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_service_request_pl",
+        "vars": {
+          "request": "form#service-form[category*:s{droga|oswietlenie|odpady|zielen}, location*:t, photo:f](action=submit-service-request)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: service-form\ntype: form\nonSubmit: submit-service-request\nfields:\n  - name: category\n    type: select\n    label: \"Kategoria\"\n    required: true\n    options:\n      - { label: \"Droga\", value: \"droga\" }\n      - { label: \"Oświetlenie\", value: \"oswietlenie\" }\n      - { label: \"Odpady\", value: \"odpady\" }\n      - { label: \"Zieleń\", value: \"zielen\" }\n  - name: location\n    type: text\n    label: \"Lokalizacja\"\n    required: true\n  - name: photo\n    type: file\n    label: \"Zdjęcie\"\n```",
+          "domainId": "service_request_311",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_benefits_enrollment_en",
+        "vars": {
+          "request": "callout#eligibility(text=\"Eligibility\")\napproval-gate#caseworker-review(text=\"Caseworker review\")",
+          "expected_mdma": "```mdma\nid: eligibility\ntype: callout\nvariant: info\ntitle: \"Eligibility\"\ncontent: \"Benefits are means-tested. Provide accurate household income to determine eligibility.\"\ndismissible: false\n```\n\n```mdma\nid: caseworker-review\ntype: approval-gate\ntitle: \"Caseworker review\"\nallowedRoles: [caseworker]\nonApprove: approve-benefits\nonDeny: deny-benefits\nrequireReason: true\n```",
+          "domainId": "benefits_enrollment",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_foia_request_en",
+        "vars": {
+          "request": "form#foia-form[records_description*:ta, date_from:d, date_to:d](action=submit-foia)",
+          "expected_mdma": "```mdma\nid: foia-form\ntype: form\nonSubmit: submit-foia\nfields:\n  - name: records_description\n    type: textarea\n    label: \"Records requested\"\n    required: true\n  - name: date_from\n    type: date\n    label: \"From date\"\n  - name: date_to\n    type: date\n    label: \"To date\"\n```",
+          "domainId": "foia_request",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_course_enrollment_pl",
+        "vars": {
+          "request": "form#enrollment-form[course*:s{algorytmy|bazy-danych|sieci}, semester*:s{zimowy|letni}](action=submit-enrollment)\ntable#prerequisites\n@lang: pl",
+          "expected_mdma": "```mdma\nid: enrollment-form\ntype: form\nonSubmit: submit-enrollment\nfields:\n  - name: course\n    type: select\n    label: \"Kurs\"\n    required: true\n    options:\n      - { label: \"Algorytmy\", value: \"algorytmy\" }\n      - { label: \"Bazy danych\", value: \"bazy-danych\" }\n      - { label: \"Sieci\", value: \"sieci\" }\n  - name: semester\n    type: select\n    label: \"Semestr\"\n    required: true\n    options:\n      - { label: \"Zimowy\", value: \"zimowy\" }\n      - { label: \"Letni\", value: \"letni\" }\n```\n\n```mdma\nid: prerequisites\ntype: table\ncolumns:\n  - { key: course, header: \"Wymagany kurs\" }\n  - { key: status, header: \"Status\" }\ndata:\n  - { course: \"Wstęp do programowania\", status: \"Zaliczony\" }\n  - { course: \"Matematyka dyskretna\", status: \"Zaliczony\" }\n```",
+          "domainId": "course_enrollment",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_course_feedback_en",
+        "vars": {
+          "request": "form#feedback-form[overall*:s{poor|average|excellent}, anonymous:c, comments:ta](action=submit-feedback)",
+          "expected_mdma": "```mdma\nid: feedback-form\ntype: form\nonSubmit: submit-feedback\nfields:\n  - name: overall\n    type: select\n    label: \"Overall rating\"\n    required: true\n    options:\n      - { label: \"Poor\", value: \"poor\" }\n      - { label: \"Average\", value: \"average\" }\n      - { label: \"Excellent\", value: \"excellent\" }\n  - name: anonymous\n    type: checkbox\n    label: \"Submit anonymously\"\n  - name: comments\n    type: textarea\n    label: \"Comments\"\n```",
+          "domainId": "course_feedback",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_scholarship_application_en",
+        "vars": {
+          "request": "callout#scholarship-note(text=\"Need-based\")\nform#scholarship-form[transcript*:f, household_income*^:n, essay*:ta](action=submit-scholarship)",
+          "expected_mdma": "```mdma\nid: scholarship-note\ntype: callout\nvariant: info\ntitle: \"Need-based\"\ncontent: \"Awards are need-based. Financial information is reviewed confidentially by the committee.\"\ndismissible: false\n```\n\n```mdma\nid: scholarship-form\ntype: form\nonSubmit: submit-scholarship\nfields:\n  - name: transcript\n    type: file\n    label: \"Transcript\"\n    required: true\n  - name: household_income\n    type: number\n    label: \"Household income\"\n    required: true\n    sensitive: true\n  - name: essay\n    type: textarea\n    label: \"Personal statement\"\n    required: true\n```",
+          "domainId": "scholarship_application",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_student_progress_table_en",
+        "vars": {
+          "request": "callout#at-risk(text=\"At risk\")\ntable#assignments",
+          "expected_mdma": "```mdma\nid: at-risk\ntype: callout\nvariant: warning\ntitle: \"At risk\"\ncontent: \"This student is below the passing threshold in two courses and may need additional support.\"\ndismissible: false\n```\n\n```mdma\nid: assignments\ntype: table\ncolumns:\n  - { key: assignment, header: \"Assignment\" }\n  - { key: grade, header: \"Grade\" }\n  - { key: status, header: \"Status\" }\ndata:\n  - { assignment: \"Essay 1\", grade: \"B\", status: \"Graded\" }\n  - { assignment: \"Midterm\", grade: \"D\", status: \"Graded\" }\n```",
+          "domainId": "student_progress",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_student_progress_chart_en",
+        "vars": {
+          "request": "chart#grades-over-term(variant=line)",
+          "expected_mdma": "```mdma\nid: grades-over-term\ntype: chart\nvariant: line\ndata: \"{{student.grades_by_week}}\"\nxAxis: week\nyAxis: grade\n```",
+          "domainId": "student_progress",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_visa_application_pl",
+        "vars": {
+          "request": "callout#visa-fee(text=\"Opłata wizowa\")\nform#visa-form[passport_number*^:t, photo*:f, purpose*:s{turystyka|biznes|studia}](action=submit-visa)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: visa-fee\ntype: callout\nvariant: info\ntitle: \"Opłata wizowa\"\ncontent: \"Opłata wizowa jest bezzwrotna, niezależnie od decyzji konsulatu.\"\ndismissible: false\n```\n\n```mdma\nid: visa-form\ntype: form\nonSubmit: submit-visa\nfields:\n  - name: passport_number\n    type: text\n    label: \"Numer paszportu\"\n    required: true\n    sensitive: true\n  - name: photo\n    type: file\n    label: \"Zdjęcie\"\n    required: true\n  - name: purpose\n    type: select\n    label: \"Cel podróży\"\n    required: true\n    options:\n      - { label: \"Turystyka\", value: \"turystyka\" }\n      - { label: \"Biznes\", value: \"biznes\" }\n      - { label: \"Studia\", value: \"studia\" }\n```",
+          "domainId": "visa_application",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_hotel_booking_en",
+        "vars": {
+          "request": "callout#cancellation(text=\"Free cancellation\")\nform#booking-form[check_in*:d, check_out*:d, room_type*:s{standard|deluxe|suite}](action=book-room)",
+          "expected_mdma": "```mdma\nid: cancellation\ntype: callout\nvariant: info\ntitle: \"Free cancellation\"\ncontent: \"Free cancellation up to 48 hours before check-in. Later cancellations are charged one night.\"\ndismissible: true\n```\n\n```mdma\nid: booking-form\ntype: form\nonSubmit: book-room\nfields:\n  - name: check_in\n    type: date\n    label: \"Check-in\"\n    required: true\n  - name: check_out\n    type: date\n    label: \"Check-out\"\n    required: true\n  - name: room_type\n    type: select\n    label: \"Room type\"\n    required: true\n    options:\n      - { label: \"Standard\", value: \"standard\" }\n      - { label: \"Deluxe\", value: \"deluxe\" }\n      - { label: \"Suite\", value: \"suite\" }\n```",
+          "domainId": "hotel_booking",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_trip_itinerary_en",
+        "vars": {
+          "request": "tasklist#itinerary-tasks\ntable#bookings",
+          "expected_mdma": "```mdma\nid: itinerary-tasks\ntype: tasklist\nitems:\n  - id: day-1\n    text: \"Day 1 — arrival and city walk\"\n  - id: day-2\n    text: \"Day 2 — museum and old town\"\n  - id: day-3\n    text: \"Day 3 — day trip and departure\"\n```\n\n```mdma\nid: bookings\ntype: table\ncolumns:\n  - { key: item, header: \"Booking\" }\n  - { key: date, header: \"Date\" }\n  - { key: ref, header: \"Reference\" }\ndata:\n  - { item: \"Flight\", date: \"2026-07-10\", ref: \"LO123\" }\n  - { item: \"Hotel\", date: \"2026-07-10\", ref: \"BK-9981\" }\n```",
+          "domainId": "trip_itinerary",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_flight_change_pl",
+        "vars": {
+          "request": "callout#fare-difference(text=\"Dopłata\")\nform#change-form[booking_ref*:t, new_date*:d](action=submit-change)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: fare-difference\ntype: callout\nvariant: warning\ntitle: \"Dopłata\"\ncontent: \"Zmiana terminu może wiązać się z dopłatą do różnicy w cenie biletu.\"\ndismissible: false\n```\n\n```mdma\nid: change-form\ntype: form\nonSubmit: submit-change\nfields:\n  - name: booking_ref\n    type: text\n    label: \"Numer rezerwacji\"\n    required: true\n  - name: new_date\n    type: date\n    label: \"Nowa data\"\n    required: true\n```",
+          "domainId": "flight_change",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_property_inquiry_pl",
+        "vars": {
+          "request": "form#inquiry-form[listing_id*:t, viewing_date:d, phone*^:t](action=submit-inquiry)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: inquiry-form\ntype: form\nonSubmit: submit-inquiry\nfields:\n  - name: listing_id\n    type: text\n    label: \"Numer oferty\"\n    required: true\n  - name: viewing_date\n    type: date\n    label: \"Preferowana data oglądania\"\n  - name: phone\n    type: text\n    label: \"Telefon kontaktowy\"\n    required: true\n    sensitive: true\n```",
+          "domainId": "property_inquiry",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_tenant_application_en",
+        "vars": {
+          "request": "callout#screening-note(text=\"Screening\")\nform#tenant-form[full_name*:t, monthly_income*^:n, income_proof*^:f](action=submit-tenant)",
+          "expected_mdma": "```mdma\nid: screening-note\ntype: callout\nvariant: info\ntitle: \"Screening\"\ncontent: \"Applications include a credit and reference check. Your information is handled confidentially.\"\ndismissible: false\n```\n\n```mdma\nid: tenant-form\ntype: form\nonSubmit: submit-tenant\nfields:\n  - name: full_name\n    type: text\n    label: \"Full name\"\n    required: true\n  - name: monthly_income\n    type: number\n    label: \"Monthly income\"\n    required: true\n    sensitive: true\n  - name: income_proof\n    type: file\n    label: \"Proof of income\"\n    required: true\n    sensitive: true\n```",
+          "domainId": "tenant_application",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_maintenance_request_en",
+        "vars": {
+          "request": "form#maintenance-form[category*:s{plumbing|electrical|heating}, urgency*:s{low|medium|high}, photo:f](action=submit-maintenance)",
+          "expected_mdma": "```mdma\nid: maintenance-form\ntype: form\nonSubmit: submit-maintenance\nfields:\n  - name: category\n    type: select\n    label: \"Issue category\"\n    required: true\n    options:\n      - { label: \"Plumbing\", value: \"plumbing\" }\n      - { label: \"Electrical\", value: \"electrical\" }\n      - { label: \"Heating\", value: \"heating\" }\n  - name: urgency\n    type: select\n    label: \"Urgency\"\n    required: true\n    options:\n      - { label: \"Low\", value: \"low\" }\n      - { label: \"Medium\", value: \"medium\" }\n      - { label: \"High\", value: \"high\" }\n  - name: photo\n    type: file\n    label: \"Photo of the issue\"\n```",
+          "domainId": "maintenance_request",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_contract_intake_en",
+        "vars": {
+          "request": "form#contract-form[contract_type*:s{msa|sow|dpa}, document*:f, counterparty*:t](action=submit-contract)",
+          "expected_mdma": "```mdma\nid: contract-form\ntype: form\nonSubmit: submit-contract\nfields:\n  - name: contract_type\n    type: select\n    label: \"Contract type\"\n    required: true\n    options:\n      - { label: \"MSA\", value: \"msa\" }\n      - { label: \"SOW\", value: \"sow\" }\n      - { label: \"DPA\", value: \"dpa\" }\n  - name: document\n    type: file\n    label: \"Contract document\"\n    required: true\n  - name: counterparty\n    type: text\n    label: \"Counterparty\"\n    required: true\n```",
+          "domainId": "contract_intake",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_nda_request_en",
+        "vars": {
+          "request": "callout#nda-disclaimer(text=\"Template only\")\nform#nda-form[party_a*:t, party_b*:t, mutual*:s{mutual|one-way}, term_months*:n](action=generate-nda)",
+          "expected_mdma": "```mdma\nid: nda-disclaimer\ntype: callout\nvariant: warning\ntitle: \"Template only\"\ncontent: \"This generates a template NDA and is not a substitute for legal advice.\"\ndismissible: false\n```\n\n```mdma\nid: nda-form\ntype: form\nonSubmit: generate-nda\nfields:\n  - name: party_a\n    type: text\n    label: \"Disclosing party\"\n    required: true\n  - name: party_b\n    type: text\n    label: \"Receiving party\"\n    required: true\n  - name: mutual\n    type: select\n    label: \"Type\"\n    required: true\n    options:\n      - { label: \"Mutual\", value: \"mutual\" }\n      - { label: \"One-way\", value: \"one-way\" }\n  - name: term_months\n    type: number\n    label: \"Term (months)\"\n    required: true\n```",
+          "domainId": "nda_request",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_case_intake_en",
+        "vars": {
+          "request": "callout#privilege(text=\"Privileged\")\napproval-gate#conflict-check(text=\"Conflict check\")",
+          "expected_mdma": "```mdma\nid: privilege\ntype: callout\nvariant: info\ntitle: \"Privileged\"\ncontent: \"Information submitted here may be protected by attorney-client privilege.\"\ndismissible: false\n```\n\n```mdma\nid: conflict-check\ntype: approval-gate\ntitle: \"Conflict check\"\nallowedRoles: [partner]\nonApprove: open-matter\nonDeny: decline-matter\nrequireReason: false\n```",
+          "domainId": "case_intake",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_adv_kyc_conflicting_en",
+        "vars": {
+          "request": "callout#conflict-callout(text=\"Conflicting requirements\")\nform#min-kyc-form[legal_name*^:t, national_id*^:t](action=submit-min-kyc)",
+          "expected_mdma": "```mdma\nid: conflict-callout\ntype: callout\nvariant: error\ntitle: \"Conflicting requirements\"\ncontent: \"A verified account cannot be anonymous. Identity details below are required for verification and are never shown publicly.\"\ndismissible: false\n```\n\n```mdma\nid: min-kyc-form\ntype: form\nonSubmit: submit-min-kyc\nfields:\n  - name: legal_name\n    type: text\n    label: \"Legal name\"\n    required: true\n    sensitive: true\n  - name: national_id\n    type: text\n    label: \"National ID\"\n    required: true\n    sensitive: true\n```",
+          "domainId": "kyc_basic",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_adv_return_ambiguous_en",
+        "vars": {
+          "request": "form#simple-return-form[order_id*:t, reason*:ta](action=start-return)",
+          "expected_mdma": "```mdma\nid: simple-return-form\ntype: form\nonSubmit: start-return\nfields:\n  - name: order_id\n    type: text\n    label: \"Order number\"\n    required: true\n  - name: reason\n    type: textarea\n    label: \"What would you like to return and why?\"\n    required: true\n```",
+          "domainId": "ecommerce_return",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_adv_medical_mixed_lang_pl",
+        "vars": {
+          "request": "form#mixed-intake-form[height_cm*:n, weight_kg*:n, last_rtg:d](action=submit-mixed-intake)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: mixed-intake-form\ntype: form\nonSubmit: submit-mixed-intake\nfields:\n  - name: height_cm\n    type: number\n    label: \"Wzrost (cm)\"\n    required: true\n  - name: weight_kg\n    type: number\n    label: \"Waga (kg)\"\n    required: true\n  - name: last_rtg\n    type: date\n    label: \"Data ostatniego badania RTG\"\n```",
+          "domainId": "medical_intake_clinic",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_adv_loan_over_constrained_en",
+        "vars": {
+          "request": "form#precheck-form[ssn*^:t, date_of_birth*^:d, annual_income*^:n, bank_account*^:t](action=submit-precheck)",
+          "expected_mdma": "```mdma\nid: precheck-form\ntype: form\nonSubmit: submit-precheck\nfields:\n  - name: ssn\n    type: text\n    label: \"Social security number\"\n    required: true\n    sensitive: true\n  - name: date_of_birth\n    type: date\n    label: \"Date of birth\"\n    required: true\n    sensitive: true\n  - name: annual_income\n    type: number\n    label: \"Annual income\"\n    required: true\n    sensitive: true\n  - name: bank_account\n    type: text\n    label: \"Bank account number\"\n    required: true\n    sensitive: true\n```",
+          "domainId": "loan_application",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_adv_approval_only_en",
+        "vars": {
+          "request": "callout#awaiting-signoff(text=\"Awaiting sign-off\")\napproval-gate#partner-signoff(text=\"Partner sign-off\")",
+          "expected_mdma": "```mdma\nid: awaiting-signoff\ntype: callout\nvariant: info\ntitle: \"Awaiting sign-off\"\ncontent: \"The contract has been reviewed by legal and is ready for partner sign-off.\"\ndismissible: false\n```\n\n```mdma\nid: partner-signoff\ntype: approval-gate\ntitle: \"Partner sign-off\"\nrequiredApprovers: 1\nallowedRoles: [partner]\nonApprove: approve-contract\nonDeny: reject-contract\nrequireReason: false\n```",
+          "domainId": "contract_intake",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_adv_chart_only_pl",
+        "vars": {
+          "request": "chart#cashflow(variant=area)\n@lang: pl",
+          "expected_mdma": "```mdma\nid: cashflow\ntype: chart\nvariant: area\ndata: \"{{finance.cashflow}}\"\nxAxis: month\nyAxis: net\n```",
+          "domainId": "budget_dashboard",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_adv_table_only_pl",
+        "vars": {
+          "request": "table#cashflow-table\n@lang: pl",
+          "expected_mdma": "```mdma\nid: cashflow-table\ntype: table\ncolumns:\n  - { key: month, header: \"Miesiąc\" }\n  - { key: inflow, header: \"Wpływy\" }\n  - { key: outflow, header: \"Wydatki\" }\ndata:\n  - { month: \"Styczeń\", inflow: 18000, outflow: 14200 }\n  - { month: \"Luty\", inflow: 17500, outflow: 15100 }\n```",
+          "domainId": "budget_dashboard",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_foia_received_callout_en",
+        "vars": {
+          "request": "callout#request-received(text=\"Request received\")",
+          "expected_mdma": "```mdma\nid: request-received\ntype: callout\nvariant: success\ntitle: \"Request received\"\ncontent: \"Your records request was received. We will respond within 20 business days.\"\ndismissible: true\n```",
+          "domainId": "foia_request",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_service_outage_callout_pl",
+        "vars": {
+          "request": "callout#outage-notice(text=\"Przerwa w działaniu\")\n@lang: pl",
+          "expected_mdma": "```mdma\nid: outage-notice\ntype: callout\nvariant: warning\ntitle: \"Przerwa w działaniu\"\ncontent: \"System zgłoszeń jest tymczasowo niedostępny z powodu prac serwisowych. Spróbuj ponownie później.\"\ndismissible: false\n```",
+          "domainId": "service_request_311",
+          "language": "pl"
+        }
+      },
+      {
+        "description": "hold_dispute_credit_callout_en",
+        "vars": {
+          "request": "callout#credit-notice(text=\"Provisional credit applied\")",
+          "expected_mdma": "```mdma\nid: credit-notice\ntype: callout\nvariant: info\ntitle: \"Provisional credit applied\"\ncontent: \"A provisional credit has been applied to your account while we investigate the dispute.\"\ndismissible: true\n```",
+          "domainId": "card_dispute",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_seller_payout_activate_en",
+        "vars": {
+          "request": "callout#payout-activating(text=\"Activating payouts\")\nwebhook#activate-payout",
+          "expected_mdma": "```mdma\nid: payout-activating\ntype: callout\nvariant: info\ntitle: \"Activating payouts\"\ncontent: \"Your payout details were saved — activating payouts now.\"\ndismissible: false\n```\n\n```mdma\nid: activate-payout\ntype: webhook\nurl: \"https://payouts.example.com/api/activate\"\nmethod: POST\ntrigger: submit-payout\n```",
+          "domainId": "seller_payout",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_claim_notify_webhook_en",
+        "vars": {
+          "request": "callout#claimant-notifying(text=\"Notifying claimant\")\nwebhook#notify-claimant",
+          "expected_mdma": "```mdma\nid: claimant-notifying\ntype: callout\nvariant: info\ntitle: \"Notifying claimant\"\ncontent: \"Sending the status update to the claimant notification service.\"\ndismissible: false\n```\n\n```mdma\nid: notify-claimant\ntype: webhook\nurl: \"https://notify.example.com/api/claims/status\"\nmethod: POST\ntrigger: update-status\nbody:\n  event: \"status-change\"\n```",
+          "domainId": "claim_webhook_notify",
+          "language": "en"
+        }
+      },
+      {
+        "description": "hold_flight_rebook_webhook_pl",
+        "vars": {
+          "request": "callout#rebook-processing(text=\"Przetwarzanie zmiany\")\nwebhook#ticketing\n@lang: pl",
+          "expected_mdma": "```mdma\nid: rebook-processing\ntype: callout\nvariant: info\ntitle: \"Przetwarzanie zmiany\"\ncontent: \"Wysyłanie zmiany rezerwacji do systemu biletowego.\"\ndismissible: false\n```\n\n```mdma\nid: ticketing\ntype: webhook\nurl: \"https://ticketing.example.com/api/rebook\"\nmethod: POST\ntrigger: submit-change\nbody:\n  action: \"rebook\"\n```",
+          "domainId": "flight_change",
+          "language": "pl"
+        }
+      }
+    ],
+    "scenarios": [],
+    "env": {},
+    "defaultTest": {
+      "assert": [
+        {
+          "type": "javascript",
+          "value": "file://assertions/validate-mdma.mjs",
+          "config": {
+            "exclude": [
+              "flow-ordering"
+            ]
+          }
+        }
+      ],
+      "vars": {},
+      "options": {},
+      "metadata": {}
+    },
+    "outputPath": [
+      "own-model/results.json"
+    ],
+    "extensions": [],
+    "metadata": {},
+    "evaluateOptions": {}
+  },
+  "shareableUrl": null,
+  "metadata": {
+    "promptfooVersion": "0.121.9",
+    "nodeVersion": "v22.22.0",
+    "platform": "darwin",
+    "arch": "arm64",
+    "exportedAt": "2026-06-30T12:35:14.118Z",
+    "evaluationCreatedAt": "2026-06-30T12:32:59.728Z"
+  }
+}
\ No newline at end of file
diff --git a/evals/own-model/run-conversation.mjs b/evals/own-model/run-conversation.mjs
new file mode 100644
index 0000000..85be2b7
--- /dev/null
+++ b/evals/own-model/run-conversation.mjs
@@ -0,0 +1,107 @@
+/**
+ * Genuine multi-turn DSL conversation runner for our model.
+ *
+ * promptfoo's `_conversation` threading (metadata.conversationId) does not
+ * populate in this setup, so this runs the conversations directly: it threads
+ * the model's OWN outputs turn-by-turn (system + accumulated user-DSL/assistant-
+ * MDMA + current DSL), and checks each turn for:
+ *   - valid MDMA (validator)
+ *   - only the current turn's component type (only-components)
+ *   - NO cross-contamination: the output must not re-emit an earlier turn's
+ *     component id.
+ *
+ * Conversations + per-turn expected type are read from tests-conversation.yaml.
+ * Serial by construction (turn N needs turn N-1's output). Run from evals/:
+ *   node own-model/run-conversation.mjs
+ */
+import 'dotenv/config';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import YAML from 'yaml';
+import { validate } from '@mobile-reality/mdma-validator';
+import { getAuthorPromptVariant } from '@mobile-reality/mdma-prompt-pack';
+
+const AUTHORING_SYSTEM_PROMPT = getAuthorPromptVariant('mobile-reality/mdma-il').prompt;
+
+const BASE = process.env.OWN_MODEL_BASE_URL || 'https://REDACTED.modal.run/v1';
+const MODEL = (process.env.OWN_MODEL_PROVIDER || 'openai:chat:mdma-26b').split(':').pop();
+
+const here = (p) => fileURLToPath(new URL(p, import.meta.url));
+const turns = YAML.parse(readFileSync(here('tests-conversation.yaml'), 'utf8'));
+
+// group turns by conversationId, preserving order
+const convs = new Map();
+for (const t of turns) {
+  const id = t.metadata?.conversationId ?? 'default';
+  if (!convs.has(id)) convs.set(id, []);
+  const allowed = t.assert?.find((a) => String(a.value).includes('only-components'))?.config?.allowed ?? [];
+  convs.get(id).push({
+    desc: t.description,
+    dsl: String(t.vars.message).trim(),
+    allowedType: allowed[0],
+    id: (String(t.vars.message).match(/#([a-z0-9-]+)/i) || [])[1],
+  });
+}
+
+async function gen(messages) {
+  const res = await fetch(`${BASE}/chat/completions`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json', Authorization: 'Bearer unused' },
+    body: JSON.stringify({
+      model: MODEL,
+      temperature: 1,
+      max_tokens: 2048,
+      // Cut the Gemma 4 reasoning repetition loop (see repetition-loops.md):
+      // min_p is the primary tail-cutter, repetition_penalty starts low.
+      min_p: 0.02,
+      repetition_penalty: 1.1,
+      chat_template_kwargs: { enable_thinking: false },
+      messages,
+    }),
+  });
+  const j = await res.json();
+  if (!j.choices) throw new Error(`HTTP ${res.status}: ${JSON.stringify(j).slice(0, 120)}`);
+  return j.choices[0].message.content;
+}
+
+function blockTypes(out) {
+  return [...out.matchAll(/```mdma\n([\s\S]*?)```/g)]
+    .map((m) => (m[1].match(/^\s*type:\s*([a-z-]+)/m) || [])[1])
+    .filter(Boolean);
+}
+
+let pass = 0;
+let total = 0;
+console.log(`endpoint ${BASE} · model ${MODEL}\n`);
+for (const [cid, ts] of convs) {
+  console.log(`=== ${cid} (${ts.length} turns) ===`);
+  const messages = [{ role: 'system', content: AUTHORING_SYSTEM_PROMPT }];
+  const priorIds = [];
+  for (const turn of ts) {
+    total++;
+    messages.push({ role: 'user', content: turn.dsl });
+    let out;
+    try {
+      out = await gen(messages);
+    } catch (e) {
+      console.log(`  FAIL ${turn.desc} — ${e.message}`);
+      continue;
+    }
+    messages.push({ role: 'assistant', content: out });
+
+    const v = validate(out, { exclude: ['thinking-block', 'flow-ordering'], autoFix: false });
+    const types = blockTypes(out).filter((t) => t !== 'thinking');
+    const onlyExpected = types.length > 0 && types.every((t) => t === turn.allowedType);
+    const contaminated = priorIds.filter((pid) => new RegExp(`id:\\s*"?${pid}\\b`).test(out));
+    const ok = v.ok && onlyExpected && contaminated.length === 0;
+    if (ok) pass++;
+    const notes = [];
+    if (!v.ok)
+      notes.push(`invalid: ${v.issues.filter((i) => i.severity === 'error').map((i) => i.ruleId).join(',')}`);
+    if (!onlyExpected) notes.push(`emitted: ${types.join(',') || 'none'}; expected ${turn.allowedType}`);
+    if (contaminated.length) notes.push(`RE-EMITTED prior: ${contaminated.join(',')}`);
+    console.log(`  ${ok ? 'PASS' : 'FAIL'} ${turn.desc}${notes.length ? ` [${notes.join('] [')}]` : ''}`);
+    if (turn.id) priorIds.push(turn.id);
+  }
+}
+console.log(`\nTOTAL: ${pass}/${total} turns valid + scoped (no cross-contamination)`);
diff --git a/evals/own-model/tests-author.yaml b/evals/own-model/tests-author.yaml
new file mode 100644
index 0000000..9b9d46d
--- /dev/null
+++ b/evals/own-model/tests-author.yaml
@@ -0,0 +1,272 @@
+# Author suite for our model — DSL port of ../tests.yaml (28 scenarios).
+# Each `request` is the scenario expressed as a DSL intent (the input our model
+# was trained on), driven by the shared authoring system prompt (prompt-author.mjs).
+# The structural assertions are carried over unchanged. Details the DSL can't
+# carry (table sortable/filterable, button confirm, approval-gate
+# requiredApprovers/allowedRoles, bindings) are filled in by the model.
+
+# 1. Simple contact form
+- description: Contact form (DSL)
+  vars:
+    request: |
+      form#contact-form[full-name*:t, email*^:e, message*:ta](action=contact-submitted)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/exact-field-count.mjs, config: { expected: 3 } }
+    - { type: javascript, value: file://assertions/has-sensitive.mjs }
+
+# 2. PII sensitivity
+- description: Employee PII form, sensitive fields (DSL)
+  vars:
+    request: |
+      form#employee-pii-form[full-name*:t, email*^:e, phone^:t, ssn*^:t](action=employee-pii-submitted)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/exact-field-count.mjs, config: { expected: 4 } }
+    - { type: javascript, value: file://assertions/pii-sensitive.mjs }
+
+# 3. Thinking block before content (warning callout)
+- description: Maintenance callout (DSL)
+  vars:
+    request: |
+      callout#maintenance-warning(variant=warning)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [callout] } }
+
+# 4. No markdown wrapping (button)
+- description: Submit report button (DSL)
+  vars:
+    request: |
+      button#submit-report-btn(text="Submit Report", action=submit-report, variant=primary)
+  assert:
+    - { type: not-contains, value: "```markdown" }
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [button] } }
+
+# 5. Select field has options
+- description: Country selector form (DSL)
+  vars:
+    request: |
+      form#country-selector-form[country*:s{us|uk|ca|au|de}](action=country-form-submitted)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/select-has-options.mjs }
+
+# 6. Table
+- description: Languages table (DSL)
+  vars:
+    request: |
+      table#languages-table
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [table] } }
+    - { type: javascript, value: file://assertions/table-features.mjs }
+
+# 7. Bar chart
+- description: Quarterly sales bar chart (DSL)
+  vars:
+    request: |
+      chart#quarterly-sales(variant=bar)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [chart] } }
+    - { type: javascript, value: file://assertions/bar-chart.mjs }
+
+# 8. Approval gate (2 approvers)
+- description: Document approval gate (DSL)
+  vars:
+    request: |
+      approval-gate#document-approval
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [approval-gate] } }
+
+# 9. Incident triage form
+- description: Incident triage severity form (DSL)
+  vars:
+    request: |
+      form#severity-form[title*:t, reporter-email*^:e, severity*:s{critical|high|medium|low}](action=triage-submitted)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/exact-field-count.mjs, config: { expected: 3 } }
+    - { type: javascript, value: file://assertions/has-sensitive.mjs }
+
+# 10. Unique kebab IDs across components (form + callout)
+- description: Settings form + saved callout (DSL)
+  vars:
+    request: |
+      form#settings-form[display-name*:t, email*^:e](action=settings-saved)
+      callout#settings-saved(variant=success)
+  assert:
+    - { type: javascript, value: file://assertions/unique-kebab-ids.mjs }
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+    - { type: javascript, value: file://assertions/component-count.mjs, config: { min: 2 } }
+
+# 11. Callout — warning
+- description: Password-expiry warning callout (DSL)
+  vars:
+    request: |
+      callout#password-expiry(variant=warning)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [callout] } }
+    - { type: javascript, value: file://assertions/callout-variant.mjs, config: { variant: warning } }
+
+# 12. Callout — error
+- description: Payment-error callout (DSL)
+  vars:
+    request: |
+      callout#payment-error(variant=error)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [callout] } }
+    - { type: javascript, value: file://assertions/callout-variant.mjs, config: { variant: error } }
+
+# 13. Callout — success
+- description: Account-verified callout (DSL)
+  vars:
+    request: |
+      callout#account-verified(variant=success)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [callout] } }
+    - { type: javascript, value: file://assertions/callout-variant.mjs, config: { variant: success } }
+
+# 14. Pie chart
+- description: Browser-share pie chart (DSL)
+  vars:
+    request: |
+      chart#browser-share(variant=pie)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [chart] } }
+    - { type: javascript, value: file://assertions/pie-chart.mjs }
+
+# 15. Button with confirm dialog
+- description: Delete-account danger button (DSL)
+  vars:
+    request: |
+      button#delete-account-btn(text="Delete Account", action=delete-account, variant=danger)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [button] } }
+    - { type: contains, value: "variant: danger" }
+
+# 16. Form + confirmation callout
+- description: Ticket form + submitted callout (DSL)
+  vars:
+    request: |
+      form#ticket-form[subject*:t, description*:ta](action=ticket-submitted)
+      callout#ticket-submitted(variant=success)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+    - { type: javascript, value: file://assertions/component-count.mjs, config: { min: 2 } }
+
+# 17. Sortable + filterable table
+- description: Employee directory table (DSL)
+  vars:
+    request: |
+      table#employee-directory
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [table] } }
+    - { type: javascript, value: file://assertions/table-features.mjs }
+
+# 18. Financial PII
+- description: Payment form, financial PII (DSL)
+  vars:
+    request: |
+      form#payment-form[cardholder-name*:t, card-number*^:t, expiry*^:t, cvv*^:t, billing-address*^:ta](action=payment-submitted)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/exact-field-count.mjs, config: { expected: 5 } }
+    - { type: javascript, value: file://assertions/pii-sensitive.mjs }
+
+# 19. Required fields on registration form
+- description: User registration form (DSL)
+  vars:
+    request: |
+      form#user-registration[username*:t, email*^:e, password*^:t, bio:ta](action=registration-submitted)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/exact-field-count.mjs, config: { expected: 4 } }
+    - { type: javascript, value: file://assertions/has-required-fields.mjs, config: { min: 3 } }
+
+# 20. Tasklist + button
+- description: Onboarding tasklist + button (DSL)
+  vars:
+    request: |
+      tasklist#onboarding-checklist
+      button#get-started-btn(text="Get Started", action=onboarding-done, variant=primary)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [tasklist, button] } }
+
+# 21. Form + conditional callout (binding)
+- description: Settings form + conditional callout (DSL)
+  vars:
+    request: |
+      form#settings-form[notifications-enabled:c](action=notification-info)
+      callout#notification-info(variant=info)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+
+# 22. Large HR personal-info form
+- description: HR personal info form (DSL)
+  vars:
+    request: |
+      form#personal-info-form[full-name*:t, email*^:e, phone^:t, dob^:d, ssn*^:t](action=info-submitted)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/unique-kebab-ids.mjs }
+    - { type: javascript, value: file://assertions/has-sensitive.mjs }
+
+# 23. Approval gate with roles
+- description: Budget approval gate (DSL)
+  vars:
+    request: |
+      approval-gate#budget-approval
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [approval-gate] } }
+
+# 24. Medical PII
+- description: Patient intake form, medical PII (DSL)
+  vars:
+    request: |
+      form#patient-intake[full-name*:t, dob*^:d, email^:e, phone^:t, medical-record*^:t, medications^:ta, allergies^:ta](action=patient-intake-submitted)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/exact-field-count.mjs, config: { expected: 7 } }
+    - { type: javascript, value: file://assertions/pii-sensitive.mjs }
+
+# 25. Multi-chart dashboard
+- description: Sales dashboard — 2 charts + table (DSL)
+  vars:
+    request: |
+      chart#monthly-revenue(variant=bar)
+      chart#category-breakdown(variant=pie)
+      table#region-summary
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [chart, table] } }
+    - { type: javascript, value: file://assertions/bar-chart.mjs }
+    - { type: javascript, value: file://assertions/pie-chart.mjs }
+    - { type: contains, value: "type: table" }
+
+# 26. File upload field
+- description: Resume form with file field (DSL)
+  vars:
+    request: |
+      form#resume-form[full-name*:t, resume*:f](action=resume-submitted)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/exact-field-count.mjs, config: { expected: 2 } }
+    - { type: javascript, value: file://assertions/file-field.mjs }
+
+# 27. Sensitive file upload (passport)
+- description: KYC form, sensitive file (DSL)
+  vars:
+    request: |
+      form#kyc-form[full-name*^:t, passport-scan*^:f](action=kyc-submitted)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/file-field.mjs, config: { sensitive: true } }
+    - { type: javascript, value: file://assertions/has-sensitive.mjs }
+
+# 28. Preserves explicit component id
+- description: Conference registration, preserve id (DSL)
+  vars:
+    request: |
+      form#devcon-2026-registration[full-name*:t, email*^:e](action=devcon-registration-submitted)
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: contains, value: "id: devcon-2026-registration" }
+    - { type: javascript, value: file://assertions/has-sensitive.mjs }
diff --git a/evals/own-model/tests-conversation.yaml b/evals/own-model/tests-conversation.yaml
new file mode 100644
index 0000000..de1c71b
--- /dev/null
+++ b/evals/own-model/tests-conversation.yaml
@@ -0,0 +1,151 @@
+# Multi-turn DSL conversations for our model.
+# Each turn's `message` is a DSL intent. Prior turns (DSL in / MDMA out) are
+# replayed as context (threaded by metadata.conversationId). Per turn we assert
+# only-components [<the current turn's type>] — which also catches
+# cross-contamination: if the model re-emits an earlier turn's component, that
+# component isn't in the allowed set and the turn fails.
+
+# ===== Conversation 1 — onboarding, built step by step =====
+- description: "Conv1/T1: signup form (DSL)"
+  vars:
+    message: |
+      form#signup-form[full-name*:t, email*^:e](action=create-account)
+  metadata: { conversationId: own-conv-1 }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/exact-field-count.mjs, config: { expected: 2 } }
+    - { type: javascript, value: file://assertions/has-sensitive.mjs }
+
+- description: "Conv1/T2: welcome callout — no form re-emit (DSL)"
+  vars:
+    message: |
+      callout#welcome-note(variant=success)
+  metadata: { conversationId: own-conv-1 }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [callout] } }
+    - { type: javascript, value: file://assertions/callout-variant.mjs, config: { variant: success } }
+
+- description: "Conv1/T3: onboarding tasklist — no prior re-emit (DSL)"
+  vars:
+    message: |
+      tasklist#onboarding-steps
+  metadata: { conversationId: own-conv-1 }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [tasklist] } }
+
+# ===== Conversation 2 — support ticket then confirmation =====
+- description: "Conv2/T1: ticket form with priority select (DSL)"
+  vars:
+    message: |
+      form#ticket-form[subject*:t, priority*:s{low|medium|high}, description*:ta](action=submit-ticket)
+  metadata: { conversationId: own-conv-2 }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/select-has-options.mjs }
+
+- description: "Conv2/T2: confirmation callout — no form re-emit (DSL)"
+  vars:
+    message: |
+      callout#ticket-received(variant=info)
+  metadata: { conversationId: own-conv-2 }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [callout] } }
+    - { type: javascript, value: file://assertions/callout-variant.mjs, config: { variant: info } }
+
+# ===== Conversation 3 — dashboard built component by component =====
+- description: "Conv3/T1: revenue table (DSL)"
+  vars:
+    message: |
+      table#revenue-table
+  metadata: { conversationId: own-conv-3 }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [table] } }
+    - { type: javascript, value: file://assertions/table-features.mjs }
+
+- description: "Conv3/T2: revenue bar chart — no table re-emit (DSL)"
+  vars:
+    message: |
+      chart#revenue-chart(variant=bar)
+  metadata: { conversationId: own-conv-3 }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [chart] } }
+    - { type: javascript, value: file://assertions/bar-chart.mjs }
+
+- description: "Conv3/T3: summary callout — no prior re-emit (DSL)"
+  vars:
+    message: |
+      callout#dashboard-note(variant=info)
+  metadata: { conversationId: own-conv-3 }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [callout] } }
+
+# ===== Conversation 4 — expense submission then approval =====
+- description: "Conv4/T1: expense form with select + file (DSL)"
+  vars:
+    message: |
+      form#expense-form[amount*:n, category*:s{travel|meals|equipment}, receipt*:f](action=submit-expense)
+  metadata: { conversationId: own-conv-4 }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/select-has-options.mjs }
+    - { type: javascript, value: file://assertions/file-field.mjs }
+
+- description: "Conv4/T2: approval gate — no form re-emit (DSL)"
+  vars:
+    message: |
+      approval-gate#expense-approval
+  metadata: { conversationId: own-conv-4 }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [approval-gate] } }
+
+# ===== Conversation-flow ports (multi-INTERACTIVE-step flows) =====
+# DSL port of the flagship conversation-flow judge eval. Each step is ONE
+# interactive component, advanced one per turn. The harness enforces the flow
+# invariants the judge eval checks: one expected component per turn (no
+# cramming / wrong component) and no re-emitting a prior step's id (no
+# regeneration). Correct order is implied by the per-turn expected type.
+
+# ----- Conv5 — registration → manager approval (2 interactive steps) -----
+- description: "Conv5/T1: registration form (DSL)"
+  vars:
+    message: |
+      form#registration-form[full-name*:t, email*^:e](action=registration-submitted)
+  metadata: { conversationId: own-conv-5-flow }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/has-sensitive.mjs }
+
+- description: "Conv5/T2: approval gate advances flow — no form re-emit (DSL)"
+  vars:
+    message: |
+      approval-gate#manager-approval
+  metadata: { conversationId: own-conv-5-flow }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [approval-gate] } }
+
+# ----- Conv6 — incident flow: form → approval-gate → webhook (3 steps) -----
+- description: "Conv6/T1: incident intake form (DSL)"
+  vars:
+    message: |
+      form#incident-form[title*:t, severity*:s{low|high}](action=incident-submitted)
+  metadata: { conversationId: own-conv-6-flow }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form] } }
+    - { type: javascript, value: file://assertions/select-has-options.mjs }
+
+- description: "Conv6/T2: approval gate advances flow — no form re-emit (DSL)"
+  vars:
+    message: |
+      approval-gate#incident-review
+  metadata: { conversationId: own-conv-6-flow }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [approval-gate] } }
+
+- description: "Conv6/T3: webhook advances flow — no prior re-emit (DSL)"
+  vars:
+    message: |
+      webhook#notify-oncall(action=incident-approved)
+  metadata: { conversationId: own-conv-6-flow }
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [webhook] } }
+    - { type: javascript, value: file://assertions/has-webhook.mjs }
diff --git a/evals/own-model/tests-custom.yaml b/evals/own-model/tests-custom.yaml
new file mode 100644
index 0000000..bd9be72
--- /dev/null
+++ b/evals/own-model/tests-custom.yaml
@@ -0,0 +1,325 @@
+# Custom system-prompt scenarios for our model — customPrompt carries DSL.
+#
+# Same scenarios/assertions as ../tests-custom-prompt.yaml, but each customPrompt
+# prescribes the intent in DSL (the INPUT our model was trained on), not an MDMA
+# blueprint (the output). The NL request stays as the user message; the model
+# combines DSL-grammar rules (system) + the scenario DSL (custom) + the request.
+
+# 1. Bug report form
+- description: "Bug report form (DSL custom prompt)"
+  vars:
+    customPrompt: |
+      You are a bug tracking assistant. When a user reports a bug, generate the
+      MDMA document for exactly this DSL intent and nothing else:
+
+      form#bug-report[title*:t, severity:s{critical|high|medium|low}, steps*:ta, expected:ta, actual:ta](action=bug-submitted)
+    request: I found a bug where the login page crashes after entering my password.
+  assert:
+    - type: javascript
+      value: file://assertions/only-components.mjs
+      config:
+        allowed: [form]
+    - type: javascript
+      value: file://assertions/exact-field-count.mjs
+      config:
+        expected: 5
+    - type: javascript
+      value: file://assertions/select-has-options.mjs
+    - type: javascript
+      value: file://assertions/has-required-fields.mjs
+      config:
+        min: 2
+    - type: contains
+      value: "type: textarea"
+
+# 2. Employee onboarding (turn-1 form only)
+- description: "Onboarding form (DSL custom prompt)"
+  vars:
+    customPrompt: |
+      You are an HR onboarding assistant. When a new hire needs to be set up,
+      generate the MDMA document for exactly this DSL intent and nothing else:
+
+      form#new-hire-info[full-name*:t, email*^:e, department:s{engineering|design|marketing|sales|operations}, start-date*:d](action=onboarding-checklist)
+    request: We have a new hire starting in the Design department next Monday.
+  assert:
+    - type: javascript
+      value: file://assertions/only-components.mjs
+      config:
+        allowed: [form]
+    - type: javascript
+      value: file://assertions/exact-field-count.mjs
+      config:
+        expected: 4
+    - type: javascript
+      value: file://assertions/select-has-options.mjs
+    - type: javascript
+      value: file://assertions/has-sensitive.mjs
+    - type: javascript
+      value: file://assertions/has-required-fields.mjs
+      config:
+        min: 3
+
+# 3. Customer feedback — form + pie chart (multi-component)
+- description: "Feedback form + pie chart (DSL custom prompt)"
+  vars:
+    customPrompt: |
+      You are a customer success assistant. When asked about feedback, generate
+      the MDMA document for exactly this DSL intent and nothing else:
+
+      form#feedback-form[customer-name:t, email^:e, rating:s{1|2|3|4|5}, feedback:ta](action=feedback-submitted)
+      chart#satisfaction-chart(variant=pie)
+    request: I need to collect customer feedback for this quarter.
+  assert:
+    - type: javascript
+      value: file://assertions/only-components.mjs
+      config:
+        allowed: [form, chart]
+    - type: javascript
+      value: file://assertions/exact-field-count.mjs
+      config:
+        expected: 4
+    - type: javascript
+      value: file://assertions/pie-chart.mjs
+    - type: javascript
+      value: file://assertions/select-has-options.mjs
+    - type: javascript
+      value: file://assertions/has-sensitive.mjs
+
+# 4. Expense report (turn-1 form only)
+- description: "Expense form (DSL custom prompt)"
+  vars:
+    customPrompt: |
+      You are a finance assistant. When an expense needs logging, generate the
+      MDMA document for exactly this DSL intent and nothing else:
+
+      form#expense-form[amount*:n, category:s{travel|equipment|software|meals|other}, date*:d, receipt-desc:ta, receipt-number:t](action=approve-expense)
+    request: I need to expense a $250 flight for the NYC conference.
+  assert:
+    - type: javascript
+      value: file://assertions/only-components.mjs
+      config:
+        allowed: [form]
+    - type: javascript
+      value: file://assertions/exact-field-count.mjs
+      config:
+        expected: 5
+    - type: javascript
+      value: file://assertions/select-has-options.mjs
+
+# 5. IT ticket (turn-1 form only)
+- description: "IT ticket form (DSL custom prompt)"
+  vars:
+    customPrompt: |
+      You are an IT helpdesk assistant. When a user reports an issue, generate
+      the MDMA document for exactly this DSL intent and nothing else:
+
+      form#it-ticket[requester-email*^:e, issue-type:s{hardware|software|network|access|other}, priority:s{low|medium|high|urgent}, description*:ta](action=submit-ticket)
+    request: My monitor stopped working this morning and I can't do any visual design work.
+  assert:
+    - type: javascript
+      value: file://assertions/only-components.mjs
+      config:
+        allowed: [form]
+    - type: javascript
+      value: file://assertions/exact-field-count.mjs
+      config:
+        expected: 4
+    - type: javascript
+      value: file://assertions/has-sensitive.mjs
+    - type: javascript
+      value: file://assertions/has-required-fields.mjs
+      config:
+        min: 2
+
+# 6. Sales dashboard — table + bar chart (multi-component)
+- description: "Sales table + bar chart (DSL custom prompt)"
+  vars:
+    customPrompt: |
+      You are a sales analytics assistant. When asked for a sales report,
+      generate the MDMA document for exactly this DSL intent and nothing else:
+
+      table#sales-table
+      chart#sales-chart(variant=bar)
+    request: Show me the Q4 sales performance breakdown.
+  assert:
+    - type: javascript
+      value: file://assertions/only-components.mjs
+      config:
+        allowed: [table, chart]
+    - type: javascript
+      value: file://assertions/table-features.mjs
+    - type: javascript
+      value: file://assertions/bar-chart.mjs
+    - type: not-contains
+      value: "type: form"
+    - type: not-contains
+      value: "type: button"
+
+# 7. Patient intake — PII-heavy form
+- description: "Patient intake form, PII marking (DSL custom prompt)"
+  vars:
+    customPrompt: |
+      You are a medical intake assistant. When registering a patient, generate
+      the MDMA document for exactly this DSL intent and nothing else:
+
+      form#patient-intake[full-name*^:t, dob*^:d, email^:e, phone^:t, insurance-id*^:t, chief-complaint*:ta](action=patient-registered)
+    request: New patient walk-in needs to be registered.
+  assert:
+    - type: javascript
+      value: file://assertions/only-components.mjs
+      config:
+        allowed: [form]
+    - type: javascript
+      value: file://assertions/exact-field-count.mjs
+      config:
+        expected: 6
+    - type: javascript
+      value: file://assertions/pii-sensitive.mjs
+    - type: javascript
+      value: file://assertions/has-required-fields.mjs
+      config:
+        min: 4
+
+# 8. Maintenance notice — single callout
+- description: "Maintenance callout (DSL custom prompt)"
+  vars:
+    customPrompt: |
+      You are a system status communicator. On a maintenance event, generate
+      the MDMA document for exactly this DSL intent and nothing else (fill the
+      callout content from the user's message):
+
+      callout#maintenance-notice(variant=warning)
+    request: We need to take the payment system offline on Saturday March 8th from 2 AM to 6 AM EST for database migration.
+  assert:
+    - type: javascript
+      value: file://assertions/only-components.mjs
+      config:
+        allowed: [callout]
+    - type: javascript
+      value: file://assertions/callout-variant.mjs
+      config:
+        variant: warning
+    - type: not-contains
+      value: "type: form"
+    - type: not-contains
+      value: "type: button"
+
+# 9. Contract review (turn-1 form only)
+- description: "Contract summary form (DSL custom prompt)"
+  vars:
+    customPrompt: |
+      You are a legal operations assistant. When a contract needs review,
+      generate the MDMA document for exactly this DSL intent and nothing else:
+
+      form#contract-summary[contract-title*:t, counterparty*:t, contract-value*:n, effective-date*:d, contract-type:s{nda|msa|sow|amendment|renewal}](action=review-checklist)
+    request: We need to review the new SoW from Acme Corp worth $500k.
+  assert:
+    - type: javascript
+      value: file://assertions/only-components.mjs
+      config:
+        allowed: [form]
+    - type: javascript
+      value: file://assertions/exact-field-count.mjs
+      config:
+        expected: 5
+    - type: javascript
+      value: file://assertions/select-has-options.mjs
+    - type: javascript
+      value: file://assertions/has-required-fields.mjs
+      config:
+        min: 4
+
+# 10b. Recruiting — job application with file uploads
+- description: "Job application with file fields (DSL custom prompt)"
+  vars:
+    customPrompt: |
+      You are a recruiting assistant. When a candidate applies, generate the
+      MDMA document for exactly this DSL intent and nothing else:
+
+      form#job-application[full-name*:t, email*^:e, resume*:f, portfolio:f, cover-letter:ta](action=application-submitted)
+    request: I'd like to apply for the senior product designer role you posted.
+  assert:
+    - type: javascript
+      value: file://assertions/only-components.mjs
+      config:
+        allowed: [form]
+    - type: javascript
+      value: file://assertions/exact-field-count.mjs
+      config:
+        expected: 5
+    - type: javascript
+      value: file://assertions/file-field.mjs
+    - type: javascript
+      value: file://assertions/has-sensitive.mjs
+
+# 10c. KYC — sensitive identity file uploads
+- description: "KYC identity form, all fields sensitive (DSL custom prompt)"
+  vars:
+    customPrompt: |
+      You are a KYC compliance assistant. When verifying a customer's identity,
+      generate the MDMA document for exactly this DSL intent and nothing else:
+
+      form#kyc-identity-form[full-legal-name*^:t, date-of-birth*^:d, passport-scan*^:f, proof-of-address*^:f](action=kyc-identity-verified)
+    request: I need to verify the identity of a new customer applying for an account.
+  assert:
+    - type: javascript
+      value: file://assertions/only-components.mjs
+      config:
+        allowed: [form]
+    - type: javascript
+      value: file://assertions/exact-field-count.mjs
+      config:
+        expected: 4
+    - type: javascript
+      value: file://assertions/file-field.mjs
+      config:
+        sensitive: true
+    - type: javascript
+      value: file://assertions/pii-sensitive.mjs
+    - type: javascript
+      value: file://assertions/has-required-fields.mjs
+      config:
+        min: 4
+
+# 11. Account deletion — danger button
+- description: "Danger button (DSL custom prompt)"
+  vars:
+    customPrompt: |
+      You are an account management assistant. When a user wants to delete their
+      account, generate the MDMA document for exactly this DSL intent and nothing
+      else:
+
+      button#delete-account(text="Delete My Account", action=delete-account-action, variant=danger)
+    request: I want to close my account and delete all my data.
+  assert:
+    - type: javascript
+      value: file://assertions/only-components.mjs
+      config:
+        allowed: [button]
+    - type: contains
+      value: "variant: danger"
+    - type: not-contains
+      value: "type: form"
+    - type: not-contains
+      value: "type: table"
+    - type: not-contains
+      value: "type: callout"
+
+# 12. Specific component id preserved from the DSL
+- description: "Specific component id preserved (DSL custom prompt)"
+  vars:
+    customPrompt: |
+      You are a vendor onboarding assistant. When onboarding a vendor, generate
+      the MDMA document for exactly this DSL intent and nothing else (keep the id):
+
+      form#vendor-intake-q1-2026[vendor-name*:t, vendor-contact-email*^:e, tax-identifier*^:t, service-category*:s{consulting|software|hardware|logistics|other}](action=vendor-intake-submitted)
+    request: We need to onboard a new logistics vendor for Q1.
+  assert:
+    - type: javascript
+      value: file://assertions/only-components.mjs
+      config:
+        allowed: [form]
+    - type: contains
+      value: "id: vendor-intake-q1-2026"
+    - type: javascript
+      value: file://assertions/has-sensitive.mjs
diff --git a/evals/own-model/tests-dsl.mjs b/evals/own-model/tests-dsl.mjs
new file mode 100644
index 0000000..deaf814
--- /dev/null
+++ b/evals/own-model/tests-dsl.mjs
@@ -0,0 +1,54 @@
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+
+/**
+ * Promptfoo test generator — the MDMA-IL DSL holdout gate (plan §6).
+ *
+ * Our model takes ONE MDMA-IL DSL intent and returns an MDMA document, so the
+ * eval feeds the DSL holdout (the 95 held-out scenarios in DSL form) as the
+ * user request and validates the MDMA output (the validate-mdma assertion in
+ * the config). This is the "does our model pass" gate.
+ *
+ * Source: the canonical holdout produced by the dataset pipeline
+ * (`gemma/dataset/data/holdout-dsl.jsonl`). That file is gitignored/generated —
+ * run `pnpm --filter @mobile-reality/mdma-evals dataset:build` if it's missing.
+ * Override the path with OWN_MODEL_HOLDOUT if you keep it elsewhere.
+ *
+ * Each holdout line is `{ messages: [system, user(DSL), assistant(MDMA)], ... }`.
+ * We surface the DSL as `vars.request` and keep the ground-truth MDMA in
+ * `vars.expected_mdma` for reference (the gate asserts validity, not equality).
+ */
+const HOLDOUT_PATH =
+  process.env.OWN_MODEL_HOLDOUT ??
+  fileURLToPath(new URL('../gemma/dataset/data/holdout-dsl.jsonl', import.meta.url));
+
+export default function () {
+  let raw;
+  try {
+    raw = readFileSync(HOLDOUT_PATH, 'utf8');
+  } catch {
+    throw new Error(
+      `Holdout DSL file not found at ${HOLDOUT_PATH}. Run \`pnpm --filter ` +
+        `@mobile-reality/mdma-evals dataset:build\` to generate it, or set OWN_MODEL_HOLDOUT.`,
+    );
+  }
+
+  return raw
+    .trim()
+    .split('\n')
+    .filter(Boolean)
+    .map((line) => {
+      const { messages, scenarioId, domainId, language } = JSON.parse(line);
+      const dsl = messages.find((m) => m.role === 'user')?.content ?? '';
+      const expected = messages.find((m) => m.role === 'assistant')?.content ?? '';
+      return {
+        description: scenarioId ?? domainId ?? 'holdout',
+        vars: {
+          request: dsl,
+          expected_mdma: expected,
+          domainId,
+          language,
+        },
+      };
+    });
+}
diff --git a/evals/own-model/tests-flows.yaml b/evals/own-model/tests-flows.yaml
new file mode 100644
index 0000000..3855fe9
--- /dev/null
+++ b/evals/own-model/tests-flows.yaml
@@ -0,0 +1,237 @@
+# Example-flow custom prompts for our model — customPrompt carries DSL.
+# DSL port of ../tests-flows.yaml (15 flows). Each flow's customPrompt keeps its
+# persona/context but prescribes the intent in DSL (the input), not an MDMA
+# blueprint or NL field list. Reuses the custom builder (prompt-custom.mjs):
+# authoring prompt + this customPrompt + NL request. Assertions carried over.
+
+# 1. Contact form
+- description: "Contact Form flow (DSL)"
+  vars:
+    customPrompt: |
+      You are a website assistant. When a visitor wants to contact the company,
+      generate the MDMA document for exactly this DSL intent and nothing else:
+
+      form#contact-form[full-name*:t, email*^:e, message*:ta](action=contact-submitted)
+    request: I'd like to send a message to your support team.
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+    - { type: javascript, value: file://assertions/has-sensitive.mjs }
+    - { type: javascript, value: file://assertions/has-required-fields.mjs, config: { min: 2 } }
+
+# 2. Bug report — callout + form
+- description: "Bug Report flow (DSL)"
+  vars:
+    customPrompt: |
+      You are an engineering support assistant. When a user reports a bug,
+      generate the MDMA document for exactly this DSL intent and nothing else:
+
+      callout#pagerduty-reminder(variant=warning)
+      form#bug-report-form[title*:t, reporter-email*^:e, severity*:s{P0|P1|P2|P3}, affected-component*:s{frontend|backend|database|auth|payments}, steps-to-reproduce*:ta, expected-behavior*:ta, actual-behavior*:ta](action=triage-bug-report)
+    request: The checkout page is showing a 500 error for all users in production.
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+    - { type: javascript, value: file://assertions/select-has-options.mjs }
+    - { type: javascript, value: file://assertions/has-required-fields.mjs, config: { min: 3 } }
+
+# 3. Employee onboarding — PII form
+- description: "Employee Onboarding flow (DSL)"
+  vars:
+    customPrompt: |
+      You are an HR onboarding assistant. For the new hire's first-week setup,
+      generate the MDMA document for exactly this DSL intent and nothing else:
+
+      form#personal-info-form[full-name*:t, preferred-name:t, personal-email*^:e, phone^:t, emergency-contact*^:t](action=review-onboarding-info)
+    request: Hi, I'm starting next Monday in the engineering team!
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+    - { type: javascript, value: file://assertions/has-sensitive.mjs }
+    - { type: javascript, value: file://assertions/has-required-fields.mjs, config: { min: 1 } }
+
+# 4. Budget approval — warning callout + form
+- description: "Budget Approval flow (DSL)"
+  vars:
+    customPrompt: |
+      You are a finance assistant (requests over $5,000 need director approval).
+      Generate the MDMA document for exactly this DSL intent and nothing else:
+
+      callout#budget-approval-warning(variant=warning)
+      form#budget-request-form[requester-name*:t, requester-email*^:e, department*:s{engineering|marketing|operations|finance}, requested-amount*:n, business-justification*:ta](action=route-budget-request)
+    request: I need to request $12,000 for new development servers.
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+    - { type: javascript, value: file://assertions/has-sensitive.mjs }
+    - { type: javascript, value: file://assertions/select-has-options.mjs }
+    - { type: javascript, value: file://assertions/has-required-fields.mjs, config: { min: 3 } }
+
+# 5. Customer survey — ratings
+- description: "Customer Survey flow (DSL)"
+  vars:
+    customPrompt: |
+      You are a customer experience assistant collecting post-support feedback.
+      Generate the MDMA document for exactly this DSL intent and nothing else:
+
+      form#support-survey-form[support-ticket-id*:t, overall-satisfaction*:s{1|2|3|4|5}, response-time-rating*:s{1|2|3|4|5}, issue-resolved:c, additional-comments:ta, email^:e](action=survey-submitted)
+    request: I just finished a support call and want to leave feedback.
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+    - { type: javascript, value: file://assertions/select-has-options.mjs }
+    - { type: javascript, value: file://assertions/has-required-fields.mjs, config: { min: 1 } }
+
+# 6. Sales dashboard — callout + charts
+- description: "Sales Dashboard flow (DSL)"
+  vars:
+    customPrompt: |
+      You are a sales analytics assistant. For the Q1 overview (total $2.34M,
+      111% of target; Jan $720K, Feb $780K, Mar $840K; by product Platform $980K,
+      Analytics $640K, Integrations $420K, Services $300K), generate the MDMA
+      document for exactly this DSL intent and nothing else (use that data):
+
+      callout#q1-summary(variant=info)
+      chart#revenue-trend(variant=line)
+      chart#product-breakdown(variant=bar)
+    request: Give me the Q1 sales overview with charts.
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [chart, callout, table] } }
+    - { type: javascript, value: file://assertions/component-count.mjs, config: { min: 2 } }
+
+# 7. Incident triage — form
+- description: "Incident Triage flow (DSL)"
+  vars:
+    customPrompt: |
+      You are an incident response assistant. Generate the MDMA document for
+      exactly this DSL intent and nothing else:
+
+      form#incident-intake-form[incident-title*:t, reporter-email*^:e, severity*:s{P1|P2|P3|P4}, affected-systems*:t, description*:ta](action=triage-incident)
+    request: Our payment processing service is completely down in production.
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+    - { type: javascript, value: file://assertions/has-sensitive.mjs }
+    - { type: javascript, value: file://assertions/select-has-options.mjs }
+
+# 8. Feature request — info callout + form
+- description: "Feature Request flow (DSL)"
+  vars:
+    customPrompt: |
+      You are a product operations assistant (5-day review process). Generate the
+      MDMA document for exactly this DSL intent and nothing else:
+
+      callout#review-process(variant=info)
+      form#feature-request-form[feature-title*:t, requester-name*:t, requesting-team*:s{engineering|sales|cs|marketing|product}, priority*:s{critical|high|medium|low}, description*:ta, primary-use-case*:ta](action=evaluate-feature-request)
+    request: We need a bulk export feature for our enterprise customers.
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+    - { type: javascript, value: file://assertions/select-has-options.mjs }
+    - { type: javascript, value: file://assertions/has-required-fields.mjs, config: { min: 3 } }
+
+# 9. Sprint retro — callout + charts
+- description: "Sprint Retro flow (DSL)"
+  vars:
+    customPrompt: |
+      You are a scrum master assistant. For Sprint 24 (velocity 42 of 45 target;
+      history 38,44,41,43,47; bugs Auth 5, API 3, Frontend 7, DB 2, CI/CD 1),
+      generate the MDMA document for exactly this DSL intent and nothing else:
+
+      callout#sprint-info(variant=info)
+      chart#velocity-trend(variant=line)
+      chart#bug-distribution(variant=bar)
+    request: Let's start the Sprint 24 retrospective.
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [chart, callout, form] } }
+    - { type: javascript, value: file://assertions/component-count.mjs, config: { min: 2 } }
+
+# 10. Order tracking — callout + table + tasklist
+- description: "Order Tracking flow (DSL)"
+  vars:
+    customPrompt: |
+      You are an e-commerce customer service assistant. For order ORD-2024-78542
+      (in transit, ETA Mar 22; items Wireless Keyboard, USB-C Hub x2, Monitor
+      Stand, Cable Kit), generate the MDMA document for exactly this DSL intent
+      and nothing else:
+
+      callout#order-status(variant=info)
+      table#order-items
+      tasklist#delivery-milestones
+    request: Where is my order ORD-2024-78542?
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [table, callout, tasklist, chart, button] } }
+    - { type: javascript, value: file://assertions/component-count.mjs, config: { min: 2 } }
+
+# 11. Change management — warning callout + form
+- description: "Change Management flow (DSL)"
+  vars:
+    customPrompt: |
+      You are a release management assistant (SOX/ISO compliance). Generate the
+      MDMA document for exactly this DSL intent and nothing else:
+
+      callout#risk-assessment-warning(variant=warning)
+      form#change-request-form[change-request-id*:t, jira-ticket*:t, change-title*:t, change-type*:s{standard|normal|emergency}, target-environment*:s{production|staging|pre-prod}, risk-level*:s{low|medium|high|critical}, change-description*:ta, rollback-plan*:ta](action=review-change-request)
+    request: I need to deploy a database migration to production this weekend.
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+    - { type: javascript, value: file://assertions/callout-variant.mjs, config: { variant: warning } }
+    - { type: javascript, value: file://assertions/select-has-options.mjs }
+    - { type: javascript, value: file://assertions/has-required-fields.mjs, config: { min: 3 } }
+
+# 12. Customer escalation — warning callout + PII form
+- description: "Customer Escalation flow (DSL)"
+  vars:
+    customPrompt: |
+      You are a customer support escalation assistant with SLA tracking. Generate
+      the MDMA document for exactly this DSL intent and nothing else:
+
+      callout#sla-compliance(variant=warning)
+      form#escalation-intake-form[case-id*:t, customer-name*:t, customer-email*^:e, account-id*^:t, priority*:s{P1|P2|P3|P4}, category*:s{billing|technical|account|other}, customer-sentiment*:s{angry|frustrated|neutral|satisfied}, escalation-reason*:ta](action=route-escalation)
+    request: A major enterprise customer is threatening to cancel over billing errors.
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+    - { type: javascript, value: file://assertions/has-sensitive.mjs }
+    - { type: javascript, value: file://assertions/select-has-options.mjs }
+    - { type: javascript, value: file://assertions/has-required-fields.mjs, config: { min: 3 } }
+
+# 13. Clinical procedure — error callout + form
+- description: "Clinical Ops flow (DSL)"
+  vars:
+    customPrompt: |
+      You are a clinical operations assistant (patient safety is top priority).
+      Generate the MDMA document for exactly this DSL intent and nothing else:
+
+      callout#safety-review(variant=error)
+      form#procedure-submission-form[procedure-title*:t, change-type*:s{new|major-revision|minor-revision|retirement}, department*:s{radiology|cardiology|surgery|oncology|general}, author-credentials*:t, risk-category*:s{low|medium|high|critical}, clinical-summary*:ta, contraindications*:ta](action=review-procedure)
+    request: I need to submit a new surgical procedure for the radiology department.
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+    - { type: javascript, value: file://assertions/callout-variant.mjs, config: { variant: error } }
+    - { type: javascript, value: file://assertions/select-has-options.mjs }
+    - { type: javascript, value: file://assertions/has-required-fields.mjs, config: { min: 3 } }
+
+# 14. KYC case review — all-PII form
+- description: "KYC flow (DSL)"
+  vars:
+    customPrompt: |
+      You are a compliance assistant for KYC/AML identity verification. Generate
+      the MDMA document for exactly this DSL intent and nothing else:
+
+      form#kyc-case-form[case-reference-id*:t, customer-type*:s{individual|business|trust}, full-legal-name*^:t, date-of-birth*^:d, tax-identifier*^:t, email*^:e, source-of-funds*:s{employment|business|investment|inheritance|other}, risk-rating*:s{low|medium|high|prohibited}](action=kyc-review)
+    request: We have a new high-risk business entity that needs KYC verification.
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+    - { type: javascript, value: file://assertions/pii-sensitive.mjs }
+    - { type: javascript, value: file://assertions/select-has-options.mjs }
+    - { type: javascript, value: file://assertions/has-required-fields.mjs, config: { min: 3 } }
+
+# 15. Benefits enrollment — info callout + form with exact id
+- description: "Benefits enrollment flow, exact id (DSL)"
+  vars:
+    customPrompt: |
+      You are an employee benefits enrollment assistant (window closes Mar 31,
+      2026). Generate the MDMA document for exactly this DSL intent and nothing
+      else (keep the form id):
+
+      callout#enrollment-deadline(variant=info)
+      form#benefits-enroll-2026-spring[employee-id*:t, health-plan*:s{hmo|ppo|hdhp|waive}, dental-plan*:s{basic|premium|waive}, vision-plan*:s{standard|premium|waive}, dependents-count*:n, hsa-contribution*:n](action=review-benefits-selection)
+    request: I want to enroll in this year's health benefits.
+  assert:
+    - { type: javascript, value: file://assertions/only-components.mjs, config: { allowed: [form, callout] } }
+    - { type: contains, value: "id: benefits-enroll-2026-spring" }
+    - { type: javascript, value: file://assertions/select-has-options.mjs }
+    - { type: javascript, value: file://assertions/has-required-fields.mjs, config: { min: 3 } }
diff --git a/evals/package.json b/evals/package.json
index d0f4372..2ce06af 100644
--- a/evals/package.json
+++ b/evals/package.json
@@ -4,6 +4,13 @@
   "type": "module",
   "scripts": {
     "eval": "PROMPTFOO_DISABLE_DATABASE=1 promptfoo eval; exit 0",
+    "eval:own-model": "PROMPTFOO_DISABLE_DATABASE=1 promptfoo eval -c own-model/promptfooconfig.own-model.yaml -j 1; exit 0",
+    "eval:own-model:custom": "PROMPTFOO_DISABLE_DATABASE=1 promptfoo eval -c own-model/promptfooconfig.own-model-custom.yaml -j 1; exit 0",
+    "eval:own-model:author": "PROMPTFOO_DISABLE_DATABASE=1 promptfoo eval -c own-model/promptfooconfig.own-model-author.yaml -j 1; exit 0",
+    "eval:own-model:flows": "PROMPTFOO_DISABLE_DATABASE=1 promptfoo eval -c own-model/promptfooconfig.own-model-flows.yaml -j 1; exit 0",
+    "eval:own-model:conversation": "node own-model/run-conversation.mjs; exit 0",
+    "eval:own-model:fixer": "PROMPTFOO_DISABLE_DATABASE=1 promptfoo eval -c own-model/promptfooconfig.own-model-fixer.yaml -j 1; exit 0",
+    "eval:own-model:guidance": "PROMPTFOO_DISABLE_DATABASE=1 promptfoo eval -c own-model/promptfooconfig.own-model-guidance.yaml -j 1; exit 0",
     "eval:custom": "PROMPTFOO_DISABLE_DATABASE=1 promptfoo eval -c promptfooconfig.custom.yaml; exit 0",
     "eval:conversation": "PROMPTFOO_DISABLE_DATABASE=1 promptfoo eval -c promptfooconfig.conversation.yaml; exit 0",
     "eval:prompt-builder": "PROMPTFOO_DISABLE_DATABASE=1 promptfoo eval -c promptfooconfig.prompt-builder.yaml; exit 0",
@@ -23,7 +30,14 @@
     "@mobile-reality/mdma-cli": "workspace:*",
     "@mobile-reality/mdma-prompt-pack": "workspace:*",
     "@mobile-reality/mdma-validator": "workspace:*",
+    "dotenv": "^16.4.5",
+    "openai": "^6.0.0",
     "promptfoo": "0.121.9",
-    "yaml": "^2.6.0"
+    "yaml": "^2.6.0",
+    "zod": "^3.24.0"
+  },
+  "devDependencies": {
+    "@types/node": "^22.0.0",
+    "tsx": "^4.19.0"
   }
 }
diff --git a/evals/prompt-builder.mjs b/evals/prompt-builder.mjs
index cb19519..434d7a0 100644
--- a/evals/prompt-builder.mjs
+++ b/evals/prompt-builder.mjs
@@ -8,7 +8,8 @@ import { selectMasterPrompt } from './select-prompt.mjs';
  * generate a `customPrompt` — a domain-specific prompt that uses
  * correct YAML-based MDMA examples.
  *
- * The Master Prompt is resolved from `EVAL_PROVIDER` — if a
+ * The Master Prompt is resolved from the actual provider promptfoo is calling
+ * (`context.provider.id`, falling back to `EVAL_PROVIDER`) — if a
  * model-specialized variant lives at packages/cli/src/prompts/<family>/<model>.ts,
  * it wins over the default.
  *
@@ -18,13 +19,23 @@ import { selectMasterPrompt } from './select-prompt.mjs';
  * promise is created once and cached, so the selector runs only once
  * per eval run.
  */
-const masterPromptPromise = selectMasterPrompt().then(({ prompt, source }) => {
-  console.error(`[prompt-builder] master prompt: ${source}`);
-  return prompt;
-});
+const promptByProvider = new Map();
 
-export default async function ({ vars }) {
-  const masterPrompt = await masterPromptPromise;
+function resolveMasterPrompt(providerId) {
+  if (!promptByProvider.has(providerId)) {
+    promptByProvider.set(
+      providerId,
+      selectMasterPrompt(providerId).then(({ prompt, source }) => {
+        console.error(`[prompt-builder] master prompt: ${source}`);
+        return prompt;
+      }),
+    );
+  }
+  return promptByProvider.get(providerId);
+}
+
+export default async function ({ vars, provider }) {
+  const masterPrompt = await resolveMasterPrompt(provider?.id ?? process.env.EVAL_PROVIDER);
   const escaped = masterPrompt.replaceAll('{{', '{% raw %}{{').replaceAll('}}', '}}{% endraw %}');
 
   return [
diff --git a/evals/prompt-conversation.mjs b/evals/prompt-conversation.mjs
index 19916c0..6a12f0b 100644
--- a/evals/prompt-conversation.mjs
+++ b/evals/prompt-conversation.mjs
@@ -8,19 +8,30 @@ import { selectAuthorPrompt } from './select-prompt.mjs';
  * replays any prior conversation turns from `_conversation`,
  * and appends the current user message.
  *
- * The author prompt base is resolved from `EVAL_PROVIDER` — model-specialized
- * variants under `mdma-author/<family>/<model>.ts` win over the default.
+ * The author prompt base is resolved from the actual provider promptfoo is
+ * calling (`context.provider.id`, falling back to `EVAL_PROVIDER`) —
+ * model-specialized variants under `mdma-author/<family>/<model>.ts` win over the default.
  * Selector falls back to the canonical `MDMA_AUTHOR_PROMPT` when no variant
  * matches. Resolution is deferred into a promise (no top-level await — tsx/cjs
  * forbids it) and cached so the selector runs only once per eval run.
  */
-const authorPromptPromise = selectAuthorPrompt().then(({ prompt, source }) => {
-  console.error(`[author-conversation] system prompt: ${source}`);
-  return prompt;
-});
+const promptByProvider = new Map();
 
-export default async function ({ vars }) {
-  const authorPrompt = await authorPromptPromise;
+function resolveAuthorPrompt(providerId) {
+  if (!promptByProvider.has(providerId)) {
+    promptByProvider.set(
+      providerId,
+      selectAuthorPrompt(providerId).then(({ prompt, source }) => {
+        console.error(`[author-conversation] system prompt: ${source}`);
+        return prompt;
+      }),
+    );
+  }
+  return promptByProvider.get(providerId);
+}
+
+export default async function ({ vars, provider }) {
+  const authorPrompt = await resolveAuthorPrompt(provider?.id ?? process.env.EVAL_PROVIDER);
   const systemPrompt = buildSystemPrompt({
     authorPrompt,
     customPrompt: vars.customPrompt,
diff --git a/evals/prompt-custom.mjs b/evals/prompt-custom.mjs
index 3f551c9..d2a9aa3 100644
--- a/evals/prompt-custom.mjs
+++ b/evals/prompt-custom.mjs
@@ -7,21 +7,32 @@ import { selectAuthorPrompt } from './select-prompt.mjs';
  * Like prompt.mjs, but passes `vars.customPrompt` to buildSystemPrompt()
  * so the MDMA author prompt is layered with a user-defined system prompt.
  *
- * The author prompt base is resolved from `EVAL_PROVIDER` — if a model-
- * specialized variant lives at packages/prompt-pack/src/prompts/mdma-author/
+ * The author prompt base is resolved from the actual provider promptfoo is
+ * calling (`context.provider.id`, falling back to `EVAL_PROVIDER`) — if a
+ * model-specialized variant lives at packages/prompt-pack/src/prompts/mdma-author/
  * <family>/<model>.ts, it wins over the default. The selector falls back to
  * the canonical `MDMA_AUTHOR_PROMPT` when no variant matches, so unset or
  * unrecognized providers behave exactly as before. Resolution is deferred
  * into a promise (no top-level await — promptfoo loads `.mjs` via tsx/cjs
  * which forbids it) and cached so the selector runs only once per eval run.
  */
-const authorPromptPromise = selectAuthorPrompt().then(({ prompt, source }) => {
-  console.error(`[author-custom] system prompt: ${source}`);
-  return prompt;
-});
+const promptByProvider = new Map();
 
-export default async function ({ vars }) {
-  const authorPrompt = await authorPromptPromise;
+function resolveAuthorPrompt(providerId) {
+  if (!promptByProvider.has(providerId)) {
+    promptByProvider.set(
+      providerId,
+      selectAuthorPrompt(providerId).then(({ prompt, source }) => {
+        console.error(`[author-custom] system prompt: ${source}`);
+        return prompt;
+      }),
+    );
+  }
+  return promptByProvider.get(providerId);
+}
+
+export default async function ({ vars, provider }) {
+  const authorPrompt = await resolveAuthorPrompt(provider?.id ?? process.env.EVAL_PROVIDER);
   const systemPrompt = buildSystemPrompt({
     authorPrompt,
     customPrompt: vars.customPrompt,
diff --git a/evals/prompt-fixer.mjs b/evals/prompt-fixer.mjs
index 9d5c488..002ba56 100644
--- a/evals/prompt-fixer.mjs
+++ b/evals/prompt-fixer.mjs
@@ -20,7 +20,7 @@ import { selectFixerPrompt } from './select-prompt.mjs';
  * 2. Collects remaining unfixed issues
  * 3. Sends the fixer system prompt (with variant-specific extensions) + user message
  */
-export default async function ({ vars }) {
+export default async function ({ vars, provider }) {
   // Default to single-block scope unless the test explicitly opts into
   // multi-step (variantKey: 'flow'). For single-block tests we also drop
   // the flow-ordering rule from validate() since by design each test has
@@ -32,7 +32,9 @@ export default async function ({ vars }) {
   const result = validate(vars.brokenDocument, { exclude });
   const allIssues = result.issues.filter((i) => i.severity === 'error' || i.severity === 'warning');
 
-  const { prompt: variantPrompt, source: fixerSource } = await selectFixerPrompt();
+  const { prompt: variantPrompt, source: fixerSource } = await selectFixerPrompt(
+    provider?.id ?? process.env.EVAL_PROVIDER,
+  );
   const fixerPrompt = fixerSource.startsWith('default')
     ? buildFixerPrompt(variantKey)
     : variantPrompt;
diff --git a/evals/prompt-guidance.mjs b/evals/prompt-guidance.mjs
index c2f0b45..51110b9 100644
--- a/evals/prompt-guidance.mjs
+++ b/evals/prompt-guidance.mjs
@@ -15,14 +15,24 @@ import { selectAuthorPrompt } from './select-prompt.mjs';
  * level so promptfoo includes it in the API request.
  */
 
-const systemPromptPromise = selectAuthorPrompt().then(({ prompt, source }) => {
-  console.error(`[guidance] system prompt: ${source}`);
-  const agentToolPrompt = getAgentToolPromptVariant(source).prompt;
-  return buildSystemPrompt({ authorPrompt: prompt, customPrompt: agentToolPrompt });
-});
+const promptByProvider = new Map();
 
-export default async function ({ vars }) {
-  const systemPrompt = await systemPromptPromise;
+function resolveSystemPrompt(providerId) {
+  if (!promptByProvider.has(providerId)) {
+    promptByProvider.set(
+      providerId,
+      selectAuthorPrompt(providerId).then(({ prompt, source }) => {
+        console.error(`[guidance] system prompt: ${source}`);
+        const agentToolPrompt = getAgentToolPromptVariant(source).prompt;
+        return buildSystemPrompt({ authorPrompt: prompt, customPrompt: agentToolPrompt });
+      }),
+    );
+  }
+  return promptByProvider.get(providerId);
+}
+
+export default async function ({ vars, provider }) {
+  const systemPrompt = await resolveSystemPrompt(provider?.id ?? process.env.EVAL_PROVIDER);
 
   return [
     { role: 'system', content: `{% raw %}${systemPrompt}{% endraw %}` },
diff --git a/evals/prompt.mjs b/evals/prompt.mjs
index 8d57caa..f0a2782 100644
--- a/evals/prompt.mjs
+++ b/evals/prompt.mjs
@@ -12,19 +12,32 @@ import { selectAuthorPrompt } from './select-prompt.mjs';
  * passes it through verbatim — the model sees clean `{{...}}` without any
  * template artifacts.
  *
- * The author prompt is resolved from `EVAL_PROVIDER` — if a model-specialized
- * variant lives at packages/prompt-pack/src/prompts/<family>/<model>.ts, it
- * wins over the default. Resolution is deferred into a promise (no top-level
- * await — promptfoo loads `.mjs` via tsx/cjs which forbids it) and cached so
- * the selector runs once per eval run.
+ * The author prompt is resolved from the ACTUAL provider promptfoo is calling
+ * (`context.provider.id`), falling back to `EVAL_PROVIDER` only if promptfoo
+ * doesn't supply one. This keeps the system prompt in sync with the model
+ * even when the provider is pinned in the config's `providers:` block rather
+ * than via the env var. If a model-specialized variant lives at
+ * packages/prompt-pack/src/prompts/mdma-author/<family>/<model>.ts, it wins
+ * over the default. Resolution is memoized per provider id so the selector
+ * runs once per model per eval run.
  */
-const authorPromptPromise = selectAuthorPrompt().then(({ prompt, source }) => {
-  console.error(`[author] system prompt: ${source}`);
-  return buildSystemPrompt({ authorPrompt: prompt });
-});
+const promptByProvider = new Map();
 
-export default async function ({ vars }) {
-  const systemPrompt = await authorPromptPromise;
+function resolveAuthorPrompt(providerId) {
+  if (!promptByProvider.has(providerId)) {
+    promptByProvider.set(
+      providerId,
+      selectAuthorPrompt(providerId).then(({ prompt, source }) => {
+        console.error(`[author] system prompt: ${source}`);
+        return buildSystemPrompt({ authorPrompt: prompt });
+      }),
+    );
+  }
+  return promptByProvider.get(providerId);
+}
+
+export default async function ({ vars, provider }) {
+  const systemPrompt = await resolveAuthorPrompt(provider?.id ?? process.env.EVAL_PROVIDER);
 
   return [
     { role: 'system', content: `{% raw %}${systemPrompt}{% endraw %}` },
diff --git a/evals/promptfooconfig.custom.yaml b/evals/promptfooconfig.custom.yaml
index 73d02b3..f0b20d8 100644
--- a/evals/promptfooconfig.custom.yaml
+++ b/evals/promptfooconfig.custom.yaml
@@ -28,6 +28,6 @@ defaultTest:
     - type: javascript
       value: file://assertions/validate-mdma.mjs
       config:
-        exclude: [flow-ordering]
+        exclude: [flow-ordering, html-tags]
 
 tests: tests-custom-prompt.yaml
diff --git a/evals/promptfooconfig.flows.yaml b/evals/promptfooconfig.flows.yaml
index 04aef78..9d07dd2 100644
--- a/evals/promptfooconfig.flows.yaml
+++ b/evals/promptfooconfig.flows.yaml
@@ -27,5 +27,7 @@ defaultTest:
   assert:
     - type: javascript
       value: file://assertions/validate-mdma.mjs
+      config:
+        exclude: [html-tags]
 
 tests: tests-flows.yaml
diff --git a/evals/promptfooconfig.isolated.yaml b/evals/promptfooconfig.isolated.yaml
index 104e9fb..2d98bf0 100644
--- a/evals/promptfooconfig.isolated.yaml
+++ b/evals/promptfooconfig.isolated.yaml
@@ -23,6 +23,6 @@ defaultTest:
     - type: javascript
       value: file://assertions/validate-mdma.mjs
       config:
-        exclude: [flow-ordering]
+        exclude: [flow-ordering, html-tags]
 
 tests: tests-isolated.yaml
diff --git a/evals/promptfooconfig.yaml b/evals/promptfooconfig.yaml
index 4729fa5..3eae67d 100644
--- a/evals/promptfooconfig.yaml
+++ b/evals/promptfooconfig.yaml
@@ -37,7 +37,7 @@ defaultTest:
     - type: javascript
       value: file://assertions/validate-mdma.mjs
       config:
-        exclude: [flow-ordering]
+        exclude: [flow-ordering, html-tags]
 
 
 tests: tests.yaml
diff --git a/packages/cli/src/prompts/google/_shared.ts b/packages/cli/src/prompts/google/_shared.ts
new file mode 100644
index 0000000..4ef8803
--- /dev/null
+++ b/packages/cli/src/prompts/google/_shared.ts
@@ -0,0 +1,480 @@
+/**
+ * Shared content for Google Master Prompt variants.
+ *
+ * Each variant (gemma-4.ts, …) imports `BASE_HEADER` + `BASE_FOOTER` (the
+ * byte-identical scaffolding) and a chosen subset of `EXAMPLE_*` blocks, then
+ * composes its `MASTER_PROMPT_<MODEL>` via template-literal interpolation.
+ *
+ * Block content is duplicated from `anthropic/_shared.ts` rather than imported
+ * — same convention as the author prompts' vendor `_shared.ts` files: each
+ * vendor folder stays self-contained, so a Google-specific tweak here can't
+ * affect Anthropic variants.
+ *
+ * The `_` filename prefix is recognized by `evals/select-prompt.mjs` and
+ * skipped during variant discovery, so this file never gets matched against
+ * a model id.
+ */
+
+export const BASE_HEADER = `You are an expert MDMA prompt engineer. Your job is to create **custom prompts** that guide AI models to generate correct, domain-specific MDMA interactive documents.
+
+For every form defined in the user's configuration, your generated custom prompt includes a complete \`\`\`mdma fenced YAML block showing that form. The downstream AI uses these blocks as templates — a prose description of the fields cannot replace them.
+
+## Context
+
+MDMA (Markdown Document with Mounted Applications) extends Markdown with interactive components defined in fenced \`mdma\` code blocks. **MDMA components use YAML syntax inside the fenced blocks — never JSON.** Users install MDMA libraries in their apps and use \`buildSystemPrompt({ customPrompt })\` to configure their AI chat. The \`buildSystemPrompt\` function automatically prepends the full MDMA specification (all component types, binding syntax, authoring rules). Your output is the \`customPrompt\` that layers on top.
+
+**Your output will be concatenated AFTER the full MDMA spec.** Therefore you should not:
+- Repeat the MDMA component schemas (already in the spec)
+- Repeat the base authoring rules (unique IDs, sensitive flags, etc.)
+- Include the self-check checklist (already provided)
+
+**Your output should:**
+- Define the domain context and purpose
+- Specify which components to use and when
+- Define **conversation flow** — a multi-step sequence describing when to generate MDMA components at each stage (e.g., Step 1: show form on keyword, Step 2: show approval gate after form submission)
+- Provide domain-specific examples showing realistic content
+- Define business rules, validation constraints, and workflow logic
+- Specify which fields should be marked as sensitive
+- Define the expected document structure and flow
+
+## What You Receive
+
+The user provides a configuration describing their needs:
+- **Domain**: The business domain (e.g., finance, healthcare, engineering)
+- **Description**: What the flow/document should accomplish
+- **Selected components**: Which of the 9 MDMA types to use
+- **Component configurations**: Field definitions, options, roles, etc.
+- **Business rules**: Free-text constraints and requirements
+- **Conversation flow**: An ordered list of steps, each with a trigger condition (immediate, keyword, form-submit, contextual) and which components to render at that point
+
+## Required Fields per Component
+
+Every \`\`\`mdma block must include all required fields for its type. Missing required fields cause validation errors.
+
+| Component       | Required fields (besides \`id\` and \`type\`)                |
+|-----------------|--------------------------------------------------------------|
+| form            | \`fields\` (array, each with \`name\`, \`type\`, \`label\`), \`onSubmit\` (action ID — renders submit button) |
+| callout         | \`content\`                                                  |
+| button          | \`text\`                                                     |
+| approval-gate   | \`title\`                                                    |
+| tasklist        | \`items\` (array, each with \`id\` and \`text\`)            |
+| table           | \`columns\` (array, each with \`key\` and \`header\`), \`data\` |
+| chart           | \`data\` (pipe string: \`"Header1, Header2\\nVal1, Val2"\`) |
+| webhook         | \`url\`, \`trigger\`                                         |
+| thinking        | \`content\`                                                  |
+
+Every form includes \`onSubmit\` with a descriptive action ID (e.g., \`onSubmit: submit-kyc-form\`). Without it, the form renders without a submit button.
+
+Select fields use \`options\` as an array of objects: \`- label: "Display" value: key\`, not flat strings.
+Approval gates use \`allowedRoles\` (not \`roles\`) for role restrictions.
+
+## Output Format
+
+Generate a clean, well-structured custom prompt in plain text. Structure it as:
+
+1. **Role & Domain** — Set the domain context ("You are assisting with [domain] workflows...")
+2. **Conversation Flow** — Define the multi-step conversation flow. For each step, specify:
+   - What triggers it (user keyword, form submission, contextual condition, or immediate)
+   - Which components to render
+   - How the AI should respond at this step
+   The AI follows these steps in order — after completing one step, wait for the appropriate trigger before moving to the next. If the flow has multiple steps, do not show all components at once.
+3. **Document Purpose** — What the generated document should achieve
+4. **Component Instructions** — For each selected component, provide:
+   - When to include it
+   - What content/fields it should have
+   - Domain-specific field names and labels
+   - Which fields are sensitive (PII)
+5. **Workflow Logic** — How components relate to each other (bindings, action triggers, approval flows)
+6. **Concrete MDMA Examples** — For every form in the configuration, include a \`\`\`mdma fenced YAML block showing that form with all required fields and an \`onSubmit\` action. If the configuration has 2 forms, include 2 blocks. The downstream AI uses these as templates.
+7. **Constraints** — Things the AI must or must not do in this domain
+
+## Examples
+
+The examples below show the input configuration and the expected custom-prompt output. Match this style.`;
+
+export const BASE_FOOTER = `## Component Scope Rule
+
+Only include components that appear under "Selected Components" in the user's configuration. The description and business rules may mention other component types as context — treat those mentions as background, not as a component wishlist. If the user selected only "form" and "thinking", the output should only contain instructions and examples for form and thinking.
+
+## Important Rules
+
+1. **Be specific** — Use real field names, labels, and options relevant to the domain. Avoid generic placeholders.
+2. **Strict component scope** — Include only components listed under "Selected Components". If a business rule mentions "approval required" but approval-gate is not in Selected Components, reference the rule in prose without adding an approval-gate component.
+3. **Be complete** — Cover all selected components with detailed instructions.
+4. **Be concise** — The prompt should be focused and actionable, not a tutorial.
+5. **Respect the architecture** — Your output is a \`customPrompt\`, not a standalone system prompt. Avoid MDMA spec details that are already in the base prompt.
+6. **Use YAML, never JSON** — All examples in fenced \`mdma\` code blocks use YAML syntax.
+7. **Respect flow ordering** — When the configuration defines a multi-step conversation flow, the output instructs the AI to present components in the defined step order, waiting for each trigger before proceeding.
+8. **Include MDMA examples** — For every form in the configuration, include a \`\`\`mdma fenced YAML block. If there are 2 forms across 2 steps, include 2 form blocks.`;
+
+export const EXAMPLE_SUPPORT_INTAKE = `<example>
+**Input configuration**
+Flow Name: support-intake
+Domain: customer support
+Description: Collect support ticket details immediately when the user reports an issue
+Selected Components: form, thinking
+Conversation Flow:
+- Step 1 (immediate): thinking block + intake form
+
+**Generated custom prompt**
+
+You are assisting with customer support intake workflows. When the user mentions a problem, error, or asks for help, immediately respond with a thinking block analyzing the issue and a form to collect ticket details.
+
+### Component Instructions
+
+**Thinking** — Briefly note the issue category and any urgency signals before presenting the form.
+
+**Form** — Single intake form. Mark contact_email as sensitive.
+
+### Example
+
+\`\`\`mdma
+type: thinking
+id: support-analysis
+status: done
+collapsed: true
+content: Customer reported an issue. Gathering ticket details for the support team to triage.
+\`\`\`
+
+\`\`\`mdma
+type: form
+id: support-intake-form
+onSubmit: submit-support-ticket
+fields:
+  - name: customer_name
+    type: text
+    label: Your Name
+    required: true
+  - name: contact_email
+    type: email
+    label: Contact Email
+    required: true
+    sensitive: true
+  - name: issue_category
+    type: select
+    label: Issue Type
+    required: true
+    options:
+      - label: Billing
+        value: billing
+      - label: Technical
+        value: technical
+      - label: Account
+        value: account
+      - label: Other
+        value: other
+  - name: description
+    type: textarea
+    label: Describe your issue
+    required: true
+\`\`\`
+</example>`;
+
+export const EXAMPLE_EXPENSE_APPROVAL = `<example>
+**Input configuration**
+Flow Name: expense-approval
+Domain: finance
+Description: Submit expense report, then route to manager for approval
+Selected Components: form, approval-gate
+Conversation Flow:
+- Step 1 (immediate): expense form
+- Step 2 (form-submit): approval gate
+
+**Generated custom prompt**
+
+You are assisting with expense reporting workflows in the finance domain.
+
+### Conversation Flow
+
+**Step 1 — Submit Expense**
+When the user wants to submit an expense, immediately respond with the expense form. Do not show the approval gate yet.
+
+**Step 2 — Manager Review**
+After the user submits the expense form, show the manager approval gate. Include a thinking block analyzing the expense category and amount.
+
+### Examples
+
+\`\`\`mdma
+type: form
+id: expense-form
+onSubmit: submit-expense
+fields:
+  - name: description
+    type: text
+    label: Expense Description
+    required: true
+  - name: amount
+    type: number
+    label: Amount (USD)
+    required: true
+  - name: category
+    type: select
+    label: Category
+    required: true
+    options:
+      - label: Travel
+        value: travel
+      - label: Meals
+        value: meals
+      - label: Software
+        value: software
+      - label: Office Supplies
+        value: office
+  - name: receipt_notes
+    type: textarea
+    label: Receipt Notes
+    required: false
+\`\`\`
+
+\`\`\`mdma
+type: approval-gate
+id: expense-manager-approval
+title: Manager Expense Approval
+allowedRoles:
+  - manager
+  - finance-lead
+requiredApprovers: 1
+requireReason: false
+\`\`\`
+</example>`;
+
+export const EXAMPLE_KYC = `<example>
+**Input configuration**
+Flow Name: kyc-verification
+Domain: financial services
+Description: Verify customer identity for account opening, with PEP warning callout when applicable
+Selected Components: form, thinking, callout
+Conversation Flow:
+- Step 1 (keyword "verify identity"): thinking + optional PEP callout + applicant form
+Business Rules: Government ID number, date of birth, residential address, email, and phone are sensitive.
+
+**Generated custom prompt**
+
+You are assisting with KYC (Know Your Customer) verification workflows in the financial services domain. Mark all PII fields as sensitive: ID number, date of birth, address, email, phone.
+
+### Conversation Flow
+
+**Step 1 — Collect Applicant Data**
+When the user says "verify identity", "start KYC", or "new customer", respond with a thinking block analyzing the case, a PEP warning callout if applicable, and the applicant form.
+
+### Examples
+
+\`\`\`mdma
+type: thinking
+id: kyc-case-analysis
+status: done
+collapsed: true
+content: Applicant verification request received. Standard checks: ID document, address proof, sanctions screening.
+\`\`\`
+
+\`\`\`mdma
+type: callout
+id: pep-warning
+variant: warning
+title: PEP Flag Detected
+content: This applicant has been flagged as a Politically Exposed Person. Enhanced due diligence is required.
+dismissible: false
+\`\`\`
+
+\`\`\`mdma
+type: form
+id: kyc-applicant-form
+onSubmit: submit-kyc-application
+fields:
+  - name: full_name
+    type: text
+    label: Full Legal Name
+    required: true
+  - name: date_of_birth
+    type: date
+    label: Date of Birth
+    required: true
+    sensitive: true
+  - name: id_type
+    type: select
+    label: ID Document Type
+    required: true
+    options:
+      - label: Passport
+        value: passport
+      - label: "Driver's License"
+        value: drivers-license
+      - label: National ID
+        value: national-id
+  - name: id_number
+    type: text
+    label: Government ID Number
+    required: true
+    sensitive: true
+  - name: residential_address
+    type: textarea
+    label: Residential Address
+    required: true
+    sensitive: true
+\`\`\`
+</example>`;
+
+export const EXAMPLE_ORDER_FULFILLMENT = `<example>
+**Input configuration**
+Flow Name: order-fulfillment
+Domain: e-commerce
+Description: Customer places an order, then warehouse confirms shipping after order submission. Two distinct forms across two steps.
+Selected Components: form, thinking
+Conversation Flow:
+- Step 1 (immediate): order form
+- Step 2 (form-submit): shipping confirmation form
+
+**Generated custom prompt**
+
+You are assisting with order fulfillment workflows in the e-commerce domain. The flow has two distinct forms — collect order details first, then collect shipping confirmation after the order is submitted. Mark customer_email as sensitive.
+
+### Conversation Flow
+
+**Step 1 — Capture Order**
+When the user wants to place an order, immediately respond with a thinking block analyzing the order context and the order form. Do not show the shipping form yet.
+
+**Step 2 — Confirm Shipping**
+After the user submits the order form, respond with the shipping confirmation form. The two forms remain separate — do not merge their fields into one.
+
+### Examples
+
+\`\`\`mdma
+type: form
+id: order-form
+onSubmit: submit-order
+fields:
+  - name: customer_email
+    type: email
+    label: Customer Email
+    required: true
+    sensitive: true
+  - name: product_sku
+    type: text
+    label: Product SKU
+    required: true
+  - name: quantity
+    type: number
+    label: Quantity
+    required: true
+\`\`\`
+
+\`\`\`mdma
+type: form
+id: shipping-confirmation
+onSubmit: confirm-shipping
+fields:
+  - name: tracking_number
+    type: text
+    label: Tracking Number
+    required: true
+  - name: carrier
+    type: select
+    label: Carrier
+    required: true
+    options:
+      - label: USPS
+        value: usps
+      - label: UPS
+        value: ups
+      - label: FedEx
+        value: fedex
+  - name: estimated_delivery
+    type: date
+    label: Estimated Delivery
+    required: true
+\`\`\`
+</example>`;
+
+export const EXAMPLE_CONSULTATION_BOOKING = `<example>
+**Input configuration**
+Flow Name: consultation-booking
+Domain: scheduling
+Description: Show booking form on conversation start — no keyword trigger, no preliminary question
+Selected Components: form
+Conversation Flow:
+- Step 1 (immediate, on conversation start): booking form
+
+**Generated custom prompt**
+
+You are assisting with consultation booking. Show the booking form in the very first message of the conversation — do not wait for a keyword, do not ask a greeting question, and do not include any conditional fallback like "or when the user says...". The trigger is unconditional: the form appears on conversation start.
+
+### Conversation Flow
+
+**Step 1 — Booking (immediate, first message, unconditional)**
+On the very first message of the conversation, respond with the booking form. There is no keyword trigger and no condition to evaluate — the form is the opening message.
+
+### Example
+
+\`\`\`mdma
+type: form
+id: consultation-booking-form
+onSubmit: submit-booking
+fields:
+  - name: full_name
+    type: text
+    label: Full Name
+    required: true
+  - name: contact_email
+    type: email
+    label: Contact Email
+    required: true
+    sensitive: true
+  - name: appointment_type
+    type: select
+    label: Appointment Type
+    required: true
+    options:
+      - label: Initial Consultation
+        value: initial
+      - label: Follow-up
+        value: followup
+      - label: Discovery Call
+        value: discovery
+  - name: preferred_date
+    type: date
+    label: Preferred Date
+    required: true
+\`\`\`
+</example>`;
+
+export const EXAMPLE_CUSTOMER_FEEDBACK = `<example>
+**Input configuration**
+Flow Name: customer-feedback
+Domain: customer success
+Description: Collect post-interaction feedback in a single step
+Selected Components: form
+Conversation Flow:
+- Step 1 (immediate): feedback form
+
+**Generated custom prompt**
+
+You are assisting with customer feedback collection in the customer success domain. When the user is ready to give feedback, immediately respond with the feedback form.
+
+### Example
+
+\`\`\`mdma
+type: form
+id: feedback-form
+onSubmit: submit-feedback
+fields:
+  - name: rating
+    type: select
+    label: How was your experience?
+    required: true
+    options:
+      - label: Excellent
+        value: 5
+      - label: Good
+        value: 4
+      - label: Okay
+        value: 3
+      - label: Poor
+        value: 2
+      - label: Bad
+        value: 1
+  - name: comments
+    type: textarea
+    label: Additional comments
+    required: false
+\`\`\`
+</example>`;
diff --git a/packages/cli/src/prompts/google/gemma.ts b/packages/cli/src/prompts/google/gemma.ts
new file mode 100644
index 0000000..a2a6e1e
--- /dev/null
+++ b/packages/cli/src/prompts/google/gemma.ts
@@ -0,0 +1,79 @@
+/**
+ * Master Prompt — Google Gemma variant (whole family).
+ *
+ * Covers Google's open-weights Gemma models via OpenRouter — Gemma 4
+ * (26B-a4b / 31B) and Gemma 3n (4B). Two Gemma-specific adjustments over the
+ * default `MASTER_PROMPT`:
+ *
+ * 1. Multi-shot examples. Like the Haiku variant, a small open model leans far
+ *    more on worked examples than on instructions — with too few it slips into
+ *    JSON-shaped example blocks. The prompt-builder eval reproduced exactly
+ *    this: on the 2-step KYC flow Gemma emitted JSON instead of YAML in 2 of 4
+ *    example blocks. The five worked examples (all YAML) anchor the format.
+ *
+ * 2. A trailing flow-fidelity block. Gemma tends to broaden the specified
+ *    trigger conditions — e.g. on the single-form KYC eval case it appended
+ *    "or expresses a desire to begin a new verification process" to the listed
+ *    keywords, which the faithful-reproduction rubric penalizes. The block
+ *    instructs it to reproduce the configured triggers verbatim.
+ *
+ * 3. A trailing YAML-enforcement block. Per Google's Vertex guide, the most
+ *    critical negative constraint goes LAST, so the "every mdma block is a YAML
+ *    mapping, never JSON" rule is repeated as the final line.
+ *
+ * Composition:
+ *   BASE_HEADER
+ *     + <examples> (5 YAML worked examples — KYC included, the failing case)
+ *     + BASE_FOOTER
+ *     + FLOW_FIDELITY_BLOCK      (negative constraint)
+ *     + YAML_ENFORCEMENT_BLOCK   (negative constraint — last)
+ */
+
+import {
+  BASE_FOOTER,
+  BASE_HEADER,
+  EXAMPLE_CONSULTATION_BOOKING,
+  EXAMPLE_EXPENSE_APPROVAL,
+  EXAMPLE_KYC,
+  EXAMPLE_ORDER_FULFILLMENT,
+  EXAMPLE_SUPPORT_INTAKE,
+} from './_shared.js';
+
+/**
+ * Single-use, Gemma-specific. Kept inline (not in `_shared.ts`) since no other
+ * variant composes it. Targets the observed failure mode: example `mdma` blocks
+ * serialized as JSON instead of YAML.
+ */
+const YAML_ENFORCEMENT_BLOCK = `## Final Rule — Examples Are Always YAML
+
+Every \`\`\`mdma example block you write MUST be a YAML mapping: the first line is a \`key: value\` pair (e.g. \`type: form\`), fields are indented with spaces, and lists use \`-\`. Never emit a JSON object — no \`{\`, \`}\`, \`"key":\`, or comma-separated entries inside an \`\`\`mdma block. A block that begins with \`{\` is invalid and will be rejected. Write \`type: form\` on its own line, not \`{"type": "form"}\`.`;
+
+/**
+ * Single-use, Gemma-specific. Kept inline per the variant convention. Targets
+ * Gemma's tendency to embellish the configured conversation flow with extra,
+ * inferred trigger conditions instead of reproducing the exact ones given.
+ */
+const FLOW_FIDELITY_BLOCK = `## Reproduce the Configured Flow Exactly
+
+Use the EXACT trigger conditions listed in the configuration's conversation flow — the specific keywords or events given, and nothing more. Do NOT broaden a keyword trigger with paraphrases or inferred intent (e.g. do not append "or when the user expresses a desire to…" next to the listed keywords). If the flow says a form appears on "start KYC review" / "verify identity", those two phrases are the only triggers — reproduce them verbatim, do not generalize them.`;
+
+export const MASTER_PROMPT_GEMMA = `${BASE_HEADER}
+
+<examples>
+${EXAMPLE_CONSULTATION_BOOKING}
+
+${EXAMPLE_SUPPORT_INTAKE}
+
+${EXAMPLE_EXPENSE_APPROVAL}
+
+${EXAMPLE_ORDER_FULFILLMENT}
+
+${EXAMPLE_KYC}
+</examples>
+
+${BASE_FOOTER}
+
+${FLOW_FIDELITY_BLOCK}
+
+${YAML_ENFORCEMENT_BLOCK}
+`;
diff --git a/packages/parser/src/transform/parse-yaml.ts b/packages/parser/src/transform/parse-yaml.ts
index 71d3f0e..9a81a09 100644
--- a/packages/parser/src/transform/parse-yaml.ts
+++ b/packages/parser/src/transform/parse-yaml.ts
@@ -7,10 +7,17 @@ export type ParseYamlResult =
   | { ok: true; data: Record<string, unknown> }
   | { ok: false; error: MdmaParseError };
 
+// YAML indicator characters that cannot begin a plain scalar. A value that
+// starts with one (e.g. `unit: %`, `range: > 40 mg/dL`, `note: @home`) crashes
+// the parser — AI models emit these for symbols/units. We quote such values.
+const LEADING_INDICATOR = /^[%@`!&*>|?,]/;
+
 /**
- * Auto-quote plain scalar values that contain ": " (colon-space) — a common
- * issue with AI-generated YAML where values like `label: Example: Revenue`
- * are interpreted as nested mappings instead of a simple string value.
+ * Auto-quote plain scalar values that AI-generated YAML commonly gets wrong:
+ *  - values containing ": " (colon-space), e.g. `label: Example: Revenue`,
+ *    which YAML reads as a nested mapping instead of a string;
+ *  - values that start with a YAML indicator character, e.g. `unit: %`, which
+ *    is a parse error (`%` is a directive indicator).
  *
  * Only affects simple `key: value` lines (not block scalars, sequences, etc.).
  */
@@ -53,8 +60,9 @@ function sanitizeYaml(source: string): string {
     // Skip sequence items (e.g. `  - item`)
     if (indent && /^\s*-/.test(line)) continue;
 
-    // If the value contains ": " (colon-space), quote it
-    if (value.includes(': ')) {
+    // Quote values that contain ": " (colon-space) or start with a YAML
+    // indicator character — both break a plain scalar.
+    if (value.includes(': ') || LEADING_INDICATOR.test(value)) {
       const escaped = value.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
       lines[i] = line.replace(/:\s+.+$/, `: "${escaped}"`);
     }
diff --git a/packages/prompt-pack/src/index.ts b/packages/prompt-pack/src/index.ts
index f2e1e45..dc95955 100644
--- a/packages/prompt-pack/src/index.ts
+++ b/packages/prompt-pack/src/index.ts
@@ -29,3 +29,4 @@ export {
   getAgentToolPromptVariant,
   type AgentToolPromptVariant,
 } from './prompts/mdma-agent/registry.js';
+export { MDMA_IL_AGENT_SYSTEM_PROMPT } from './prompts/mdma-agent/mobile-reality/mdma-il.js';
diff --git a/packages/prompt-pack/src/prompts/mdma-agent/mobile-reality/mdma-il.ts b/packages/prompt-pack/src/prompts/mdma-agent/mobile-reality/mdma-il.ts
new file mode 100644
index 0000000..d370024
--- /dev/null
+++ b/packages/prompt-pack/src/prompts/mdma-agent/mobile-reality/mdma-il.ts
@@ -0,0 +1,41 @@
+/**
+ * Agentic system prompt for our own MDMA-IL model (mdma-26b) in tool-calling /
+ * chat mode — used by the demo's "own model" provider.
+ *
+ * Unlike the strict v3 DSL→MDMA generation prompt (mdma-author/mobile-reality),
+ * this drives a *conversation*: the model chats and calls `generate_mdma` to
+ * render UI. It is written per **Gemma 4** prompting guidance — direct
+ * imperative instructions (Gemma 4 has strong instruction adherence), Markdown
+ * structure (Gemma prefers Markdown over XML), role → when-to-call → format →
+ * output-discipline.
+ *
+ * The output-discipline section is deliberate: with the generic author prompt
+ * (which mandates "always include a thinking block") the model leaked a literal
+ * `<thinking>…</thinking>` block into the rendered document. Gemma 4 can emit a
+ * thought channel even when thinking is disabled, so we forbid raw
+ * `<thinking>`/`<think>` text here and offer the proper MDMA `thinking`
+ * component as the only sanctioned way to show reasoning.
+ */
+export const MDMA_IL_AGENT_SYSTEM_PROMPT = `You are an interactive-UI assistant. You chat with the user and, when they ask you to create or update an interactive document or UI, you call the \`generate_mdma\` tool to render it.
+
+## When to call generate_mdma
+Call it whenever the user asks to build, create, design, or update a form, table, checklist, chart, callout, approval gate, button, or any interactive component. For greetings, questions about your capabilities, explanations, or other conversation, reply normally and do NOT call the tool.
+
+## What to pass as \`document\`
+A Markdown document where each interactive component is a fenced \`\`\`mdma YAML block. Rules:
+- Each \`\`\`mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a "components:" array.
+- Types: form, button, tasklist, table, callout, approval-gate, webhook, chart.
+- form: top-level "onSubmit: <action-id>"; "fields" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need "options" (list of {label, value}); mark every PII field (name, email, phone, address, SSN, date-of-birth) "sensitive: true".
+- button: "text" + "onAction". tasklist: "items" (list of {id, text}). table: "columns" (key/header) + "data" rows. callout: "content" + variant ∈ info|warning|error|success. approval-gate: "title". webhook: "url" + "trigger". chart: "label" (never "title") + "data: |" CSV (header line then rows) + variant ∈ line|bar|area|pie.
+- Forms use "onSubmit", buttons "onAction", webhooks "trigger" — never a bare "action" key.
+- At most ONE interactive component per document; non-interactive components (callout, table, chart) may accompany it. Fill in realistic values the request omits.
+
+## Output discipline
+Put ONLY the Markdown document in \`document\`, and do NOT wrap it in \`\`\`markdown fences. Do NOT emit raw \`<thinking>\`, \`<think>\`, or \`<|think|>\` tags anywhere — those are not MDMA.
+Reasoning belongs ONLY inside a \`\`\`mdma \`type: thinking\` block — NOWHERE else. Do NOT write a reasoning heading or paragraph in the document body: no \`# Thinking\`, \`# Thinking process\`, \`## Reasoning\`, "Thinking process:", "My plan:", or any similar title/prose. The Markdown outside the \`\`\`mdma fences is for user-facing document content (a short title or sentence at most), never for your reasoning.
+
+## Show reasoning as a thinking component
+You may think out loud — but your reasoning is rendered as a **component**, never as chat text. When you want to reason, include a \`type: thinking\` block in the \`document\`. It is non-interactive, so it may accompany the form/table/etc. in the same \`generate_mdma\` call (put it first). Keys: \`type: thinking\`, \`id\`, \`content: <your reasoning>\`, \`status: thinking\` while still working or \`done\` when finished, \`collapsed: true\`. Keep your visible chat message to warm, plain-language conversation for the user (a sentence or two) — never put reasoning, planning, "Thinking:", "Wait,", "Correction:", or step narration in the chat text.
+
+## Reasoning discipline
+Each user message is a complete instruction — respond to it, then stop. Never reason about whether it is "your turn" or whether another message has arrived. Keep any reasoning to at most a few sentences; do not re-verify completed steps, and never repeat a token or phrase.`;
diff --git a/packages/prompt-pack/src/prompts/mdma-author/google/gemma.ts b/packages/prompt-pack/src/prompts/mdma-author/google/gemma.ts
new file mode 100644
index 0000000..5ce7405
--- /dev/null
+++ b/packages/prompt-pack/src/prompts/mdma-author/google/gemma.ts
@@ -0,0 +1,56 @@
+/**
+ * MDMA Author Prompt — Google Gemma variant (whole family).
+ *
+ * Covers Google's open-weights Gemma models served via OpenRouter — Gemma 4
+ * (26B-a4b / 31B) and the smaller Gemma 3n (4B). They share Gemini's
+ * Markdown-over-XML prompting conventions, so this variant reuses the Gemini
+ * `google/_shared.ts` blocks and the same Gemini-native ordering (behavioral
+ * directive at the top, negative constraints at the end — per Google's Vertex
+ * prompting guide).
+ *
+ * Defensive posture mirrors the smallest Gemini tier
+ * (`gemini-3.1-flash-lite-preview.ts`) and the other vendors' small-model
+ * variants (`openai/gpt-4.1-nano.ts`, `anthropic/haiku.ts`): a small open
+ * model is the most likely to drop a closing fence, emit unsolicited
+ * components, or use numeric select values, so all three universal
+ * failure-mode blocks are bundled pre-emptively.
+ *
+ * Composition (Gemini-native ordering):
+ *
+ *   BASE_OPENING (role)
+ *     + ## Output Format          (behavioral directive — top, anchor)
+ *     + BASE_BODY (the spec)
+ *     + ## Fence Closing          (negative constraint — end)
+ *     + ## Scope Discipline       (negative constraint — end)
+ *     + ## Select Option Values   (negative constraint — end)
+ *     + BASE_CHECKLIST            (## Self-Check Checklist — end)
+ *
+ * Routing: substring match on `gemma` matches every Gemma model id
+ * (`google/gemma-4-26b-a4b-it`, `google/gemma-4-31b-it`, `google/gemma-3n-e4b-it`,
+ * and their `:free` tiers), so this single variant covers the whole family.
+ * `gemma` is not a substring of any Gemini model id (`gemini-*`), so there's
+ * no cross-match with the Gemini variants in this folder.
+ */
+
+import { BASE_BODY, BASE_CHECKLIST, BASE_OPENING } from '../_shared.js';
+import {
+  FENCE_CLOSING_BLOCK,
+  OUTPUT_FORMAT_BLOCK,
+  SCOPE_DISCIPLINE_BLOCK,
+  SELECT_OPTIONS_BLOCK,
+} from './_shared.js';
+
+export const MDMA_AUTHOR_PROMPT_GEMMA = `${BASE_OPENING}
+
+${OUTPUT_FORMAT_BLOCK}
+
+${BASE_BODY}
+
+${FENCE_CLOSING_BLOCK}
+
+${SCOPE_DISCIPLINE_BLOCK}
+
+${SELECT_OPTIONS_BLOCK}
+
+${BASE_CHECKLIST}
+`;
diff --git a/packages/prompt-pack/src/prompts/mdma-author/mobile-reality/mdma-il.ts b/packages/prompt-pack/src/prompts/mdma-author/mobile-reality/mdma-il.ts
new file mode 100644
index 0000000..b5a2133
--- /dev/null
+++ b/packages/prompt-pack/src/prompts/mdma-author/mobile-reality/mdma-il.ts
@@ -0,0 +1,114 @@
+/**
+ * MDMA Author Prompt — Mobile Reality's own MDMA-IL DSL model.
+ *
+ * The model reads a single **MDMA-IL DSL intent** (the grammar is described in
+ * the prompt itself) as the user message and returns one MDMA document. A
+ * system prompt that does NOT describe the DSL input is out-of-distribution for
+ * this model — the DSL grammar section is required, not optional.
+ *
+ * Structured per Gemma 4 prompting guidance: Role → DSL input grammar →
+ * authoring rules → worked few-shot examples (form / table / chart).
+ *
+ * Used by our hosted DSL endpoints and the own-model eval harness
+ * (`evals/own-model/`, which imports this variant via `getAuthorPromptVariant`).
+ * Registry id: `mobile-reality/mdma-il`. The endpoint also requires
+ * `temperature: 0` and `chat_template_kwargs.enable_thinking = false`.
+ */
+
+export const MDMA_AUTHOR_PROMPT_MDMA_IL = `You are an MDMA authoring engine. You read a single DSL intent — a compact, one-line-per-component description of the UI to build — and produce the corresponding MDMA (Markdown Document with Mounted Applications) components.
+
+The DSL intent to build is given in your instructions (and/or the user's message). Always generate the MDMA for that DSL — never refuse, apologize, or ask the user to provide a DSL intent. Treat any natural-language message as extra context for the DSL you were given.
+
+## DSL input — the grammar you read
+\`\`\`
+<type>#<id>[<field>, <field>, ...](<prop>, <prop>, ...)   # one component per line
+field = <name>[*][^]:<typecode>[{opt1|opt2|...}]
+        *  = required        ^  = sensitive (PII: name, email, phone, address, SSN, date-of-birth, …)
+        typecode: t=text  n=number  e=email  d=date  s=select  c=checkbox  ta=textarea  f=file
+        {a|b|c} = options for a select field
+props = text="..."  |  action=<id>  |  variant=<name>
+types: form · button · tasklist · table · callout · approval-gate · webhook · chart
+\`\`\`
+
+## Authoring rules
+- Build EXACTLY the components in the DSL intent — no more, no fewer. The DSL is the complete, final spec. Never invent a component it did not list (e.g. an extra approval-gate, button, tasklist, callout, or webhook), even when the request or surrounding context implies a larger workflow ("needs approval", "review process", etc.). Describe any such follow-up in prose only; do not emit it.
+- Each \`\`\`mdma block is exactly ONE component as top-level YAML keys (type, id, …). Never wrap a component in a "components:" array.
+- Every component has "id" and "type" (one of: form, button, tasklist, table, callout, approval-gate, webhook, chart).
+- AT MOST ONE interactive component (form, button, tasklist, approval-gate, webhook) per response; non-interactive components (callout, table, chart) may accompany it. Define a referenced component before anything that references it.
+- form: top-level "onSubmit: <action-id>"; "fields" list (each name/type/label); field type ∈ text|number|email|date|select|checkbox|textarea|file; select fields need "options" (list of {label, value}); mark every PII field "sensitive: true".
+- button: "text" + "onAction: <action-id>". tasklist: "items" list of {id, text}. table: "columns" (key/header) + "data" rows. callout: "content" + variant ∈ info|warning|error|success. approval-gate: "title". webhook: "url" + "trigger: <action-id>". chart: "label" (never "title") + "data: |" CSV (header line then rows) + variant ∈ line|bar|area|pie.
+- Forms use "onSubmit", buttons "onAction", webhooks "trigger" — never a bare "action" key.
+- Fill in realistic values the DSL omits (table rows, chart CSV, callout content, approval-gate title).
+
+## Turn and reasoning discipline
+- Each message is a complete instruction. Emit exactly one document for the current DSL intent, then stop. Never reason about whether a new turn has arrived or whether it is "your turn" to continue.
+- Do not re-verify or re-emit components from earlier turns. Keep any thinking block to at most a few sentences. Never repeat a token or phrase.
+
+## Examples
+
+Intent: \`form#contact[full-name*:t, email*^:e](action=contact-submitted)\`
+
+\`\`\`mdma
+type: thinking
+id: planning
+status: done
+collapsed: true
+content: |
+  Contact form: a required name and a required, sensitive email; submits via contact-submitted.
+\`\`\`
+
+\`\`\`mdma
+type: form
+id: contact
+fields:
+  - name: full-name
+    type: text
+    label: "Full Name"
+    required: true
+  - name: email
+    type: email
+    label: "Email"
+    required: true
+    sensitive: true
+onSubmit: contact-submitted
+\`\`\`
+
+Intent: \`table#orders\` — invent realistic columns and rows; default to sortable/filterable tables.
+
+\`\`\`mdma
+type: table
+id: orders
+sortable: true
+filterable: true
+columns:
+  - key: order-id
+    header: "Order ID"
+    sortable: true
+  - key: customer
+    header: "Customer"
+    sortable: true
+  - key: total
+    header: "Total ($)"
+    sortable: true
+  - key: status
+    header: "Status"
+data:
+  - { order-id: "ORD-1001", customer: "Acme Inc", total: 1240.50, status: "Shipped" }
+  - { order-id: "ORD-1002", customer: "Globex", total: 880.00, status: "Pending" }
+  - { order-id: "ORD-1003", customer: "Initech", total: 2310.75, status: "Delivered" }
+\`\`\`
+
+Intent: \`chart#revenue(variant=bar)\` — invent a realistic CSV \`data\` block and a \`label\`.
+
+\`\`\`mdma
+type: chart
+id: revenue
+variant: bar
+label: "Monthly Revenue"
+data: |
+  Month, Revenue
+  Jan, 42000
+  Feb, 51000
+  Mar, 47500
+xAxis: Month
+\`\`\``;
diff --git a/packages/prompt-pack/src/prompts/mdma-author/registry.ts b/packages/prompt-pack/src/prompts/mdma-author/registry.ts
index 448a120..8af58b1 100644
--- a/packages/prompt-pack/src/prompts/mdma-author/registry.ts
+++ b/packages/prompt-pack/src/prompts/mdma-author/registry.ts
@@ -19,7 +19,9 @@ import { MDMA_AUTHOR_PROMPT_GEMINI_3_FLASH_PREVIEW } from './google/gemini-3-fla
 import { MDMA_AUTHOR_PROMPT_GEMINI_3_1_FLASH_LITE_PREVIEW } from './google/gemini-3.1-flash-lite-preview.js';
 import { MDMA_AUTHOR_PROMPT_GEMINI_3_1_PRO_PREVIEW_CUSTOMTOOLS } from './google/gemini-3.1-pro-preview-customtools.js';
 import { MDMA_AUTHOR_PROMPT_GEMINI_3_1_PRO_PREVIEW } from './google/gemini-3.1-pro-preview.js';
+import { MDMA_AUTHOR_PROMPT_GEMMA } from './google/gemma.js';
 import { MDMA_AUTHOR_PROMPT } from './default.js';
+import { MDMA_AUTHOR_PROMPT_MDMA_IL } from './mobile-reality/mdma-il.js';
 import { MDMA_AUTHOR_PROMPT_GPT_4_1 } from './openai/gpt-4.1.js';
 import { MDMA_AUTHOR_PROMPT_GPT_4_1_MINI } from './openai/gpt-4.1-mini.js';
 import { MDMA_AUTHOR_PROMPT_GPT_4_1_NANO } from './openai/gpt-4.1-nano.js';
@@ -107,6 +109,20 @@ export const AUTHOR_PROMPT_VARIANTS: AuthorPromptVariant[] = [
       "Gemini-native framing — Markdown headers (no XML), constraints placed at the END per Google's Gemini 3 prompting guide.",
     prompt: MDMA_AUTHOR_PROMPT_GEMINI_3_1_PRO_PREVIEW,
   },
+  {
+    id: 'google/gemma',
+    label: 'Google — Gemma',
+    description:
+      "Open-weights Gemma family (Gemma 4 26B-a4b / 31B, Gemma 3n 4B). Gemini-native Markdown framing with all defensive blocks bundled (fence closing, scope discipline, string select values) for the open-model tier.",
+    prompt: MDMA_AUTHOR_PROMPT_GEMMA,
+  },
+  {
+    id: 'mobile-reality/mdma-il',
+    label: 'Mobile Reality — MDMA-IL model',
+    description:
+      'Our self-hosted MDMA-IL DSL models (E4B mdma-v3 / 31B mdma-31b). Reads an MDMA-IL DSL intent (grammar + worked examples are in the prompt) and returns MDMA. The DSL grammar section is required — a prompt without it is out-of-distribution. Endpoint also requires temperature 0 + chat_template_kwargs.enable_thinking=false.',
+    prompt: MDMA_AUTHOR_PROMPT_MDMA_IL,
+  },
   {
     id: 'google/gemini-3.1-pro-preview-customtools',
     label: 'Google — Gemini 3.1 Pro Custom Tools (Preview)',
diff --git a/packages/prompt-pack/src/prompts/mdma-fixer/google/gemma.ts b/packages/prompt-pack/src/prompts/mdma-fixer/google/gemma.ts
new file mode 100644
index 0000000..92996e4
--- /dev/null
+++ b/packages/prompt-pack/src/prompts/mdma-fixer/google/gemma.ts
@@ -0,0 +1,47 @@
+/**
+ * MDMA Fixer Prompt — Google Gemma variant (whole family).
+ *
+ * Covers Google's open-weights Gemma models via OpenRouter — Gemma 4
+ * (26B-a4b / 31B) and Gemma 3n (4B). Shares Gemini's Markdown-over-XML
+ * conventions, so it reuses the Gemini `google/_shared.ts` fixer blocks and
+ * the same composition as the smallest Gemini tier
+ * (`gemini-3.1-flash-lite-preview.ts`): the full baseline plus
+ * TABLE_KEY_DIRECTION_BLOCK, since smaller models rename columns instead of
+ * data keys when resolving column/data-key mismatches.
+ *
+ * Routing: substring match on `gemma` covers every Gemma model id
+ * (gemma-4-*, gemma-3n-*, and their `:free` tiers) with a single variant.
+ */
+
+import {
+  MDMA_FIXER_APPROVAL,
+  MDMA_FIXER_BASE,
+  MDMA_FIXER_BINDINGS,
+  MDMA_FIXER_EXAMPLES,
+  MDMA_FIXER_FLOW,
+  MDMA_FIXER_FORMS,
+  MDMA_FIXER_PII,
+  MDMA_FIXER_STRUCTURE,
+  MDMA_FIXER_TABLES_CHARTS,
+} from '../_shared.js';
+import {
+  OUTPUT_FORMAT_BLOCK,
+  PRESERVE_INPUT_STRUCTURE_BLOCK,
+  TABLE_KEY_DIRECTION_BLOCK,
+} from './_shared.js';
+
+export const MDMA_FIXER_PROMPT_GEMMA = `${OUTPUT_FORMAT_BLOCK}
+
+${MDMA_FIXER_BASE}
+
+${MDMA_FIXER_STRUCTURE}
+${MDMA_FIXER_BINDINGS}
+${MDMA_FIXER_PII}
+${MDMA_FIXER_FORMS}
+${MDMA_FIXER_TABLES_CHARTS}
+${TABLE_KEY_DIRECTION_BLOCK}
+${MDMA_FIXER_FLOW}
+${MDMA_FIXER_APPROVAL}
+${MDMA_FIXER_EXAMPLES}
+
+${PRESERVE_INPUT_STRUCTURE_BLOCK}`;
diff --git a/packages/validator/src/types.ts b/packages/validator/src/types.ts
index 616c02e..a41cb95 100644
--- a/packages/validator/src/types.ts
+++ b/packages/validator/src/types.ts
@@ -19,7 +19,8 @@ export type ValidationRuleId =
   | 'field-name-typos'
   | 'expected-components'
   | 'form-submit-action'
-  | 'single-interactive-component';
+  | 'single-interactive-component'
+  | 'html-tags';
 
 export interface ValidationIssue {
   /** Which rule flagged this */
diff --git a/packages/validator/src/validate.ts b/packages/validator/src/validate.ts
index 1852cc9..04f80d9 100644
--- a/packages/validator/src/validate.ts
+++ b/packages/validator/src/validate.ts
@@ -45,6 +45,29 @@ function detectUnfencedComponents(markdown: string): Array<{ type: string; line:
   return results;
 }
 
+/**
+ * Detect HTML/XML-style component tags (e.g. `<thinking …/>`, `<think>`,
+ * `<form>`) that AI models sometimes emit instead of ```mdma YAML blocks. These
+ * are never valid MDMA; flagging them lets the fixer rewrite them as proper
+ * fenced components. Tags inside fenced blocks are ignored.
+ */
+function detectHtmlComponentTags(markdown: string): Array<{ tag: string; line: number }> {
+  const stripped = markdown.replace(/```[\s\S]*?```/g, (match) =>
+    '\n'.repeat(match.split('\n').length - 1),
+  );
+  const tagNames = ['thinking', 'think', ...KNOWN_TYPES];
+  // Opening tag for a known name, followed by whitespace, `/`, `>`, or EOL
+  // (handles multi-line tags like `<thinking\n  id="…" …/>`).
+  const re = new RegExp(`<(${tagNames.join('|')})(?=[\\s/>]|$)`, 'i');
+  const results: Array<{ tag: string; line: number }> = [];
+  const lines = stripped.split('\n');
+  for (let i = 0; i < lines.length; i++) {
+    const m = lines[i].match(re);
+    if (m) results.push({ tag: m[1].toLowerCase(), line: i + 1 });
+  }
+  return results;
+}
+
 function buildIdMap(blocks: ParsedBlock[]): Map<string, number> {
   const map = new Map<string, number>();
   for (const block of blocks) {
@@ -71,6 +94,21 @@ export function validate(markdown: string, options: ValidatorOptions = {}): Vali
     fixed: false,
   }));
 
+  // 0b. Detect HTML/XML-style component tags (not valid MDMA). Own rule id
+  // ('html-tags') so consumers can disable it via `exclude` without also
+  // silencing real YAML errors.
+  const htmlTags = exclude.includes('html-tags') ? [] : detectHtmlComponentTags(markdown);
+  for (const t of htmlTags) {
+    preIssues.push({
+      ruleId: 'html-tags' as ValidationRuleId,
+      severity: 'error' as const,
+      message: `Found HTML-style \`<${t.tag}>\` tag at line ${t.line} — this is NOT valid MDMA. DELETE the entire \`<${t.tag} …>\` tag (including all its attributes and any closing tag) from the output. Do NOT keep it. If it carried reasoning, you may instead express that as a single \`\`\`mdma block with \`type: thinking\` — but the raw HTML tag must be gone.`,
+      componentId: null,
+      blockIndex: -1,
+      fixed: false,
+    });
+  }
+
   // 1. Extract and parse all mdma blocks
   const blocks = extractMdmaBlocksFromMarkdown(markdown);
 
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index ab85411..d6d79c0 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -106,12 +106,28 @@ importers:
       '@mobile-reality/mdma-validator':
         specifier: workspace:*
         version: link:../packages/validator
+      dotenv:
+        specifier: ^16.4.5
+        version: 16.4.5
+      openai:
+        specifier: ^6.0.0
+        version: 6.36.0(ws@8.19.0)(zod@3.25.76)
       promptfoo:
         specifier: 0.121.9
-        version: 0.121.9(@cfworker/json-schema@4.1.1)(@langchain/core@1.1.27(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@4.4.3)))(@types/json-schema@7.0.15)(@types/node@18.19.130)(@types/react@19.2.14)(pg@8.18.0)(playwright-core@1.59.1)(socks@2.8.7)(typescript@5.9.3)
+        version: 0.121.9(@cfworker/json-schema@4.1.1)(@langchain/core@1.1.27(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@3.25.76)))(@types/json-schema@7.0.15)(@types/node@22.19.11)(@types/react@19.2.14)(pg@8.18.0)(playwright-core@1.59.1)(socks@2.8.7)(typescript@5.9.3)
       yaml:
         specifier: ^2.6.0
         version: 2.8.2
+      zod:
+        specifier: ^3.24.0
+        version: 3.25.76
+    devDependencies:
+      '@types/node':
+        specifier: ^22.0.0
+        version: 22.19.11
+      tsx:
+        specifier: ^4.19.0
+        version: 4.21.0
 
   packages/attachables-core:
     dependencies:
@@ -7644,7 +7660,7 @@ snapshots:
       - typescript
     optional: true
 
-  '@ibm-generative-ai/node-sdk@3.2.4(@langchain/core@1.1.27(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@4.4.3)))':
+  '@ibm-generative-ai/node-sdk@3.2.4(@langchain/core@1.1.27(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@3.25.76)))':
     dependencies:
       '@ai-zen/node-fetch-event-source': 2.1.4
       fetch-retry: 5.0.6
@@ -7653,7 +7669,7 @@ snapshots:
       p-queue-compat: 1.0.225
       yaml: 2.8.2
     optionalDependencies:
-      '@langchain/core': 1.1.27(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@4.4.3))
+      '@langchain/core': 1.1.27(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@3.25.76))
     transitivePeerDependencies:
       - encoding
     optional: true
@@ -7757,41 +7773,41 @@ snapshots:
 
   '@inquirer/ansi@2.0.5': {}
 
-  '@inquirer/checkbox@5.1.4(@types/node@18.19.130)':
+  '@inquirer/checkbox@5.1.4(@types/node@22.19.11)':
     dependencies:
       '@inquirer/ansi': 2.0.5
-      '@inquirer/core': 11.1.9(@types/node@18.19.130)
+      '@inquirer/core': 11.1.9(@types/node@22.19.11)
       '@inquirer/figures': 2.0.5
-      '@inquirer/type': 4.0.5(@types/node@18.19.130)
+      '@inquirer/type': 4.0.5(@types/node@22.19.11)
     optionalDependencies:
-      '@types/node': 18.19.130
+      '@types/node': 22.19.11
 
-  '@inquirer/confirm@6.0.12(@types/node@18.19.130)':
+  '@inquirer/confirm@6.0.12(@types/node@22.19.11)':
     dependencies:
-      '@inquirer/core': 11.1.9(@types/node@18.19.130)
-      '@inquirer/type': 4.0.5(@types/node@18.19.130)
+      '@inquirer/core': 11.1.9(@types/node@22.19.11)
+      '@inquirer/type': 4.0.5(@types/node@22.19.11)
     optionalDependencies:
-      '@types/node': 18.19.130
+      '@types/node': 22.19.11
 
-  '@inquirer/core@11.1.9(@types/node@18.19.130)':
+  '@inquirer/core@11.1.9(@types/node@22.19.11)':
     dependencies:
       '@inquirer/ansi': 2.0.5
       '@inquirer/figures': 2.0.5
-      '@inquirer/type': 4.0.5(@types/node@18.19.130)
+      '@inquirer/type': 4.0.5(@types/node@22.19.11)
       cli-width: 4.1.0
       fast-wrap-ansi: 0.2.0
       mute-stream: 3.0.0
       signal-exit: 4.1.0
     optionalDependencies:
-      '@types/node': 18.19.130
+      '@types/node': 22.19.11
 
-  '@inquirer/editor@5.1.1(@types/node@18.19.130)':
+  '@inquirer/editor@5.1.1(@types/node@22.19.11)':
     dependencies:
-      '@inquirer/core': 11.1.9(@types/node@18.19.130)
-      '@inquirer/external-editor': 3.0.0(@types/node@18.19.130)
-      '@inquirer/type': 4.0.5(@types/node@18.19.130)
+      '@inquirer/core': 11.1.9(@types/node@22.19.11)
+      '@inquirer/external-editor': 3.0.0(@types/node@22.19.11)
+      '@inquirer/type': 4.0.5(@types/node@22.19.11)
     optionalDependencies:
-      '@types/node': 18.19.130
+      '@types/node': 22.19.11
 
   '@inquirer/external-editor@1.0.3(@types/node@22.19.11)':
     dependencies:
@@ -7800,34 +7816,34 @@ snapshots:
     optionalDependencies:
       '@types/node': 22.19.11
 
-  '@inquirer/external-editor@3.0.0(@types/node@18.19.130)':
+  '@inquirer/external-editor@3.0.0(@types/node@22.19.11)':
     dependencies:
       chardet: 2.1.1
       iconv-lite: 0.7.2
     optionalDependencies:
-      '@types/node': 18.19.130
+      '@types/node': 22.19.11
 
   '@inquirer/figures@2.0.5': {}
 
-  '@inquirer/input@5.0.12(@types/node@18.19.130)':
+  '@inquirer/input@5.0.12(@types/node@22.19.11)':
     dependencies:
-      '@inquirer/core': 11.1.9(@types/node@18.19.130)
-      '@inquirer/type': 4.0.5(@types/node@18.19.130)
+      '@inquirer/core': 11.1.9(@types/node@22.19.11)
+      '@inquirer/type': 4.0.5(@types/node@22.19.11)
     optionalDependencies:
-      '@types/node': 18.19.130
+      '@types/node': 22.19.11
 
-  '@inquirer/select@5.1.4(@types/node@18.19.130)':
+  '@inquirer/select@5.1.4(@types/node@22.19.11)':
     dependencies:
       '@inquirer/ansi': 2.0.5
-      '@inquirer/core': 11.1.9(@types/node@18.19.130)
+      '@inquirer/core': 11.1.9(@types/node@22.19.11)
       '@inquirer/figures': 2.0.5
-      '@inquirer/type': 4.0.5(@types/node@18.19.130)
+      '@inquirer/type': 4.0.5(@types/node@22.19.11)
     optionalDependencies:
-      '@types/node': 18.19.130
+      '@types/node': 22.19.11
 
-  '@inquirer/type@4.0.5(@types/node@18.19.130)':
+  '@inquirer/type@4.0.5(@types/node@22.19.11)':
     optionalDependencies:
-      '@types/node': 18.19.130
+      '@types/node': 22.19.11
 
   '@isaacs/cliui@8.0.2':
     dependencies:
@@ -7873,14 +7889,14 @@ snapshots:
 
   '@kwsites/promise-deferred@1.1.1': {}
 
-  '@langchain/core@1.1.27(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@4.4.3))':
+  '@langchain/core@1.1.27(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@3.25.76))':
     dependencies:
       '@cfworker/json-schema': 4.1.1
       ansi-styles: 5.2.0
       camelcase: 6.3.0
       decamelize: 1.2.0
       js-tiktoken: 1.0.21
-      langsmith: 0.5.6(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@4.4.3))
+      langsmith: 0.5.6(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@3.25.76))
       mustache: 4.2.0
       p-queue: 6.6.2
       uuid: 10.0.0
@@ -9720,8 +9736,7 @@ snapshots:
     dependencies:
       path-type: 4.0.0
 
-  dotenv@16.4.5:
-    optional: true
+  dotenv@16.4.5: {}
 
   dotenv@17.4.2: {}
 
@@ -10705,7 +10720,7 @@ snapshots:
       langfuse-core: 3.38.20
     optional: true
 
-  langsmith@0.5.6(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@4.4.3)):
+  langsmith@0.5.6(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@3.25.76)):
     dependencies:
       '@types/uuid': 10.0.0
       chalk: 5.6.2
@@ -10716,7 +10731,7 @@ snapshots:
     optionalDependencies:
       '@opentelemetry/api': 1.9.0
       '@opentelemetry/sdk-trace-base': 2.7.1(@opentelemetry/api@1.9.0)
-      openai: 6.36.0(ws@8.19.0)(zod@4.4.3)
+      openai: 6.36.0(ws@8.19.0)(zod@3.25.76)
     optional: true
 
   lightningcss-android-arm64@1.31.1:
@@ -11283,6 +11298,11 @@ snapshots:
       is-inside-container: 1.0.0
       wsl-utils: 0.1.0
 
+  openai@6.36.0(ws@8.19.0)(zod@3.25.76):
+    optionalDependencies:
+      ws: 8.19.0
+      zod: 3.25.76
+
   openai@6.36.0(ws@8.19.0)(zod@4.4.3):
     optionalDependencies:
       ws: 8.19.0
@@ -11562,17 +11582,17 @@ snapshots:
   process-nextick-args@2.0.1:
     optional: true
 
-  promptfoo@0.121.9(@cfworker/json-schema@4.1.1)(@langchain/core@1.1.27(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@4.4.3)))(@types/json-schema@7.0.15)(@types/node@18.19.130)(@types/react@19.2.14)(pg@8.18.0)(playwright-core@1.59.1)(socks@2.8.7)(typescript@5.9.3):
+  promptfoo@0.121.9(@cfworker/json-schema@4.1.1)(@langchain/core@1.1.27(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@3.25.76)))(@types/json-schema@7.0.15)(@types/node@22.19.11)(@types/react@19.2.14)(pg@8.18.0)(playwright-core@1.59.1)(socks@2.8.7)(typescript@5.9.3):
     dependencies:
       '@anthropic-ai/sdk': 0.91.1(zod@4.4.3)
       '@apidevtools/json-schema-ref-parser': 15.3.5(@types/json-schema@7.0.15)
       '@googleapis/sheets': 13.0.1
-      '@inquirer/checkbox': 5.1.4(@types/node@18.19.130)
-      '@inquirer/confirm': 6.0.12(@types/node@18.19.130)
-      '@inquirer/core': 11.1.9(@types/node@18.19.130)
-      '@inquirer/editor': 5.1.1(@types/node@18.19.130)
-      '@inquirer/input': 5.0.12(@types/node@18.19.130)
-      '@inquirer/select': 5.1.4(@types/node@18.19.130)
+      '@inquirer/checkbox': 5.1.4(@types/node@22.19.11)
+      '@inquirer/confirm': 6.0.12(@types/node@22.19.11)
+      '@inquirer/core': 11.1.9(@types/node@22.19.11)
+      '@inquirer/editor': 5.1.1(@types/node@22.19.11)
+      '@inquirer/input': 5.0.12(@types/node@22.19.11)
+      '@inquirer/select': 5.1.4(@types/node@22.19.11)
       '@modelcontextprotocol/sdk': 1.29.0(@cfworker/json-schema@4.1.1)(zod@4.4.3)
       '@openai/agents': 0.8.5(@cfworker/json-schema@4.1.1)(ws@8.19.0)(zod@4.4.3)
       '@opencode-ai/sdk': 1.14.33
@@ -11663,7 +11683,7 @@ snapshots:
       '@fal-ai/client': 1.10.0
       '@huggingface/transformers': 4.2.0
       '@ibm-cloud/watsonx-ai': 1.7.11(@swc/core@1.15.33)(typescript@5.9.3)
-      '@ibm-generative-ai/node-sdk': 3.2.4(@langchain/core@1.1.27(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@4.4.3)))
+      '@ibm-generative-ai/node-sdk': 3.2.4(@langchain/core@1.1.27(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.7.1(@opentelemetry/api@1.9.0))(openai@6.36.0(ws@8.19.0)(zod@3.25.76)))
       '@openai/codex-sdk': 0.125.0
       '@playwright/browser-chromium': 1.59.1
       '@rollup/rollup-linux-x64-gnu': 4.60.3