diff --git a/CHANGELOG-Colang.md b/CHANGELOG-Colang.md index 5ff1ff619..03b194335 100644 --- a/CHANGELOG-Colang.md +++ b/CHANGELOG-Colang.md @@ -4,6 +4,12 @@ All notable changes to the Colang language and runtime will be documented in thi The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2.0-beta.7] - 2025-07-16 + +### Fixed + +* Use processed user and bot messages after input/output rails transformations to prevent leakage of unfiltered data ([#1297](https://github.com/NVIDIA/NeMo-Guardrails/pull/1297)) by @lapinek + ## [2.0-beta.6] - 2025-01-16 ### Added diff --git a/nemoguardrails/colang/v2_x/library/guardrails.co b/nemoguardrails/colang/v2_x/library/guardrails.co index a591a3925..9f6e1de00 100644 --- a/nemoguardrails/colang/v2_x/library/guardrails.co +++ b/nemoguardrails/colang/v2_x/library/guardrails.co @@ -60,13 +60,19 @@ flow _bot_say $text global $output_rails_in_progress $bot_message = $text - $last_bot_message = $text # We need to avoid running output rails on messages coming from the output rails themselves. if not $output_rails_in_progress await run output rails $text - await UtteranceBotAction(script=$text) as $action + # Use the processed bot message if available; + # otherwise, fall back to the original text + if $bot_message is not None + $last_bot_message = $bot_message + else + $last_bot_message = $text + + await UtteranceBotAction(script=$last_bot_message) as $action flow run input rails $input_text diff --git a/nemoguardrails/colang/v2_x/library/llm.co b/nemoguardrails/colang/v2_x/library/llm.co index ee1c53cf9..e80456525 100644 --- a/nemoguardrails/colang/v2_x/library/llm.co +++ b/nemoguardrails/colang/v2_x/library/llm.co @@ -49,6 +49,7 @@ flow generating user intent for unhandled user utterance activate polling llm request response activate tracking bot talking state global $bot_talking_state + global $user_message await _user_said_something_unexpected as $user_said $event = $user_said.event @@ -60,7 +61,15 @@ flow generating user intent for unhandled user utterance log 'unexpected user utterance: "{$event.final_transcript}"' log 'start generating user intent...' - $action = 'user said "{$event.final_transcript}"' + + # Use the processed user message if available; + # otherwise, fall back to the original user input + if $user_message is not None + $message_for_llm = $user_message + else + $message_for_llm = $event.final_transcript + + $action = 'user said "{$message_for_llm}"' $intent = await GenerateUserIntentAction(user_action=$action, max_example_flows=20) log 'generated user intent: {$intent}' @@ -68,7 +77,8 @@ flow generating user intent for unhandled user utterance send FinishFlow(flow_id=$intent) # We need to log the user action - send UserActionLog(flow_id="user said", parameter=$event.final_transcript, intent_flow_id=$intent) + send UserActionLog(flow_id="user said", parameter=$message_for_llm, intent_flow_id=$intent) + # And we also need to log the generated user intent if not done by another mechanism when UserIntentLog(flow_id=$intent) return @@ -84,6 +94,7 @@ flow continuation on unhandled user utterance activate polling llm request response activate tracking bot talking state global $bot_talking_state + global $user_message await _user_said_something_unexpected as $user_said $event = $user_said.event @@ -95,7 +106,15 @@ flow continuation on unhandled user utterance abort log 'start generating user intent and bot intent/action...' - $action = 'user said "{$event.final_transcript}"' + + # Use the processed user message if available; + # otherwise, fall back to the original user input + if $user_message is not None + $message_for_llm = $user_message + else + $message_for_llm = $event.final_transcript + + $action = 'user said "{$message_for_llm}"' # retrieve relevant chunks from KB if user_message is not empty @@ -117,7 +136,8 @@ flow continuation on unhandled user utterance send FinishFlow(flow_id=$user_intent) # We need to log the user action - send UserActionLog(flow_id="user said", parameter=$event.final_transcript, intent_flow_id=$user_intent) + send UserActionLog(flow_id="user said", parameter=$message_for_llm, intent_flow_id=$user_intent) + # And we also need to log the generated user intent if not done by another mechanism when UserIntentLog(flow_id=$user_intent) return diff --git a/tests/v2_x/test_input_output_rails_transformations.py b/tests/v2_x/test_input_output_rails_transformations.py new file mode 100644 index 000000000..181db19bf --- /dev/null +++ b/tests/v2_x/test_input_output_rails_transformations.py @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from nemoguardrails import RailsConfig +from tests.utils import TestChat + +yaml_content = """ +colang_version: "2.x" +models: + - type: main + engine: openai + model: gpt-4-turbo +""" + + +def test_1(): + """Test input and output rails transformations.""" + + colang_content = """ + import core + import guardrails + + flow input rails $input_text + global $user_message + $user_message = "{$input_text}, Dick" + + flow output rails $output_text + global $user_message + global $bot_message + $bot_message = "{$user_message}, and Harry" + + flow main + global $last_bot_message + await user said "Tom" + bot say "{$last_bot_message}" + """ + + config = RailsConfig.from_content(colang_content, yaml_content) + chat = TestChat( + config, + llm_completions=[], + ) + chat >> "Tom" + chat << "Tom, Dick, and Harry" + + +if __name__ == "__main__": + test_1()