Skip to content

Commit 00a9610

Browse files
authored
fix(colang): apply guardrails transformations to LLM inputs and bot outputs. (#1297)
1 parent 949e422 commit 00a9610

File tree

4 files changed

+98
-6
lines changed

4 files changed

+98
-6
lines changed

CHANGELOG-Colang.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ All notable changes to the Colang language and runtime will be documented in thi
44

55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7+
## [2.0-beta.7] - 2025-07-16
8+
9+
### Fixed
10+
11+
* Use processed user and bot messages after input/output rails transformations to prevent leakage of unfiltered data ([#1297](https://github.com/NVIDIA/NeMo-Guardrails/pull/1297)) by @lapinek
12+
713
## [2.0-beta.6] - 2025-01-16
814

915
### Added

nemoguardrails/colang/v2_x/library/guardrails.co

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,19 @@ flow _bot_say $text
6060
global $output_rails_in_progress
6161

6262
$bot_message = $text
63-
$last_bot_message = $text
6463

6564
# We need to avoid running output rails on messages coming from the output rails themselves.
6665
if not $output_rails_in_progress
6766
await run output rails $text
6867

69-
await UtteranceBotAction(script=$text) as $action
68+
# Use the processed bot message if available;
69+
# otherwise, fall back to the original text
70+
if $bot_message is not None
71+
$last_bot_message = $bot_message
72+
else
73+
$last_bot_message = $text
74+
75+
await UtteranceBotAction(script=$last_bot_message) as $action
7076

7177

7278
flow run input rails $input_text

nemoguardrails/colang/v2_x/library/llm.co

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ flow generating user intent for unhandled user utterance
4949
activate polling llm request response
5050
activate tracking bot talking state
5151
global $bot_talking_state
52+
global $user_message
5253

5354
await _user_said_something_unexpected as $user_said
5455
$event = $user_said.event
@@ -60,15 +61,24 @@ flow generating user intent for unhandled user utterance
6061

6162
log 'unexpected user utterance: "{$event.final_transcript}"'
6263
log 'start generating user intent...'
63-
$action = 'user said "{$event.final_transcript}"'
64+
65+
# Use the processed user message if available;
66+
# otherwise, fall back to the original user input
67+
if $user_message is not None
68+
$message_for_llm = $user_message
69+
else
70+
$message_for_llm = $event.final_transcript
71+
72+
$action = 'user said "{$message_for_llm}"'
6473
$intent = await GenerateUserIntentAction(user_action=$action, max_example_flows=20)
6574
log 'generated user intent: {$intent}'
6675

6776
# Generate the 'user intent' by sending out the FinishFlow event
6877
send FinishFlow(flow_id=$intent)
6978

7079
# We need to log the user action
71-
send UserActionLog(flow_id="user said", parameter=$event.final_transcript, intent_flow_id=$intent)
80+
send UserActionLog(flow_id="user said", parameter=$message_for_llm, intent_flow_id=$intent)
81+
7282
# And we also need to log the generated user intent if not done by another mechanism
7383
when UserIntentLog(flow_id=$intent)
7484
return
@@ -84,6 +94,7 @@ flow continuation on unhandled user utterance
8494
activate polling llm request response
8595
activate tracking bot talking state
8696
global $bot_talking_state
97+
global $user_message
8798

8899
await _user_said_something_unexpected as $user_said
89100
$event = $user_said.event
@@ -95,7 +106,15 @@ flow continuation on unhandled user utterance
95106
abort
96107

97108
log 'start generating user intent and bot intent/action...'
98-
$action = 'user said "{$event.final_transcript}"'
109+
110+
# Use the processed user message if available;
111+
# otherwise, fall back to the original user input
112+
if $user_message is not None
113+
$message_for_llm = $user_message
114+
else
115+
$message_for_llm = $event.final_transcript
116+
117+
$action = 'user said "{$message_for_llm}"'
99118

100119

101120
# retrieve relevant chunks from KB if user_message is not empty
@@ -117,7 +136,8 @@ flow continuation on unhandled user utterance
117136
send FinishFlow(flow_id=$user_intent)
118137

119138
# We need to log the user action
120-
send UserActionLog(flow_id="user said", parameter=$event.final_transcript, intent_flow_id=$user_intent)
139+
send UserActionLog(flow_id="user said", parameter=$message_for_llm, intent_flow_id=$user_intent)
140+
121141
# And we also need to log the generated user intent if not done by another mechanism
122142
when UserIntentLog(flow_id=$user_intent)
123143
return
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
from nemoguardrails import RailsConfig
17+
from tests.utils import TestChat
18+
19+
yaml_content = """
20+
colang_version: "2.x"
21+
models:
22+
- type: main
23+
engine: openai
24+
model: gpt-4-turbo
25+
"""
26+
27+
28+
def test_1():
29+
"""Test input and output rails transformations."""
30+
31+
colang_content = """
32+
import core
33+
import guardrails
34+
35+
flow input rails $input_text
36+
global $user_message
37+
$user_message = "{$input_text}, Dick"
38+
39+
flow output rails $output_text
40+
global $user_message
41+
global $bot_message
42+
$bot_message = "{$user_message}, and Harry"
43+
44+
flow main
45+
global $last_bot_message
46+
await user said "Tom"
47+
bot say "{$last_bot_message}"
48+
"""
49+
50+
config = RailsConfig.from_content(colang_content, yaml_content)
51+
chat = TestChat(
52+
config,
53+
llm_completions=[],
54+
)
55+
chat >> "Tom"
56+
chat << "Tom, Dick, and Harry"
57+
58+
59+
if __name__ == "__main__":
60+
test_1()

0 commit comments

Comments
 (0)