diff --git a/examples/configs/sensitive_data_detection_v2/README.md b/examples/configs/sensitive_data_detection_v2/README.md new file mode 100644 index 000000000..f2b00276c --- /dev/null +++ b/examples/configs/sensitive_data_detection_v2/README.md @@ -0,0 +1,41 @@ +# Presidio-based Sensitive Data Detection Example + +This example demonstrates how to detect and redact sensitive data using [Presidio](https://github.com/Microsoft/presidio). + +## Prerequisites + +- `Presidio` + + You can install it with: + + ```bash + poetry run pip install presidio-analyzer presidio-anonymizer + ``` + + > **Note** + > + > Presidio may come with an unsupported version of `numpy`. To reinstall the supported version, run: + > ```bash + > poetry install + > ``` + +- `en_core_web_lg` spaCy model + + You can download it with: + + ```bash + poetry run python -m spacy download en_core_web_lg + ``` + +## Running example + +To test this configuration, run the CLI chat from the `examples/configs/sensitive_data_detection_v2` directory: + +```bash +poetry run nemoguardrails chat --config=. +``` + +## Documentation + +- [Presidio-based Sensitive Data Detection configuration](../../../docs/user-guides/guardrails-library.md#presidio-based-sensitive-data-detection) +- [Presidio Integration guide](../../../docs/user-guides/community/presidio.md) diff --git a/examples/configs/sensitive_data_detection_v2/config.yml b/examples/configs/sensitive_data_detection_v2/config.yml new file mode 100644 index 000000000..8c042ff45 --- /dev/null +++ b/examples/configs/sensitive_data_detection_v2/config.yml @@ -0,0 +1,29 @@ +colang_version: "2.x" + +models: + - type: main + engine: openai + model: gpt-4o-mini + +rails: + config: + sensitive_data_detection: + input: + score_threshold: 0.4 + entities: + - PERSON + - EMAIL_ADDRESS + - PHONE_NUMBER + - CREDIT_CARD + - US_SSN + - LOCATION + + output: + score_threshold: 0.4 + entities: + - PERSON + - EMAIL_ADDRESS + - PHONE_NUMBER + - CREDIT_CARD + - US_SSN + - LOCATION diff --git a/examples/configs/sensitive_data_detection_v2/flows.co b/examples/configs/sensitive_data_detection_v2/flows.co new file mode 100644 index 000000000..1f542538e --- /dev/null +++ b/examples/configs/sensitive_data_detection_v2/flows.co @@ -0,0 +1,10 @@ +import guardrails +import nemoguardrails.library.sensitive_data_detection + +flow input rails $input_text + """Check user utterances before they get further processed.""" + await mask sensitive data on input + +flow output rails $output_text + """Check response before sending it to user.""" + await mask sensitive data on output diff --git a/examples/configs/sensitive_data_detection_v2/main.co b/examples/configs/sensitive_data_detection_v2/main.co new file mode 100644 index 000000000..e95376eab --- /dev/null +++ b/examples/configs/sensitive_data_detection_v2/main.co @@ -0,0 +1,5 @@ +import core +import llm + +flow main + activate llm continuation diff --git a/nemoguardrails/library/sensitive_data_detection/flows.co b/nemoguardrails/library/sensitive_data_detection/flows.co index f2e93438a..213c0204b 100644 --- a/nemoguardrails/library/sensitive_data_detection/flows.co +++ b/nemoguardrails/library/sensitive_data_detection/flows.co @@ -11,6 +11,7 @@ flow detect sensitive data on input flow mask sensitive data on input """Mask any sensitive data found in the user input.""" + global $user_message $user_message = await MaskSensitiveDataAction(source="input", text=$user_message) @@ -28,10 +29,11 @@ flow detect sensitive data on output flow mask sensitive data on output """Mask any sensitive data found in the bot output.""" + global $bot_message $bot_message = await MaskSensitiveDataAction(source="output", text=$bot_message) -# RETRIVAL RAILS +# RETRIEVAL RAILS flow detect sensitive data on retrieval @@ -45,4 +47,5 @@ flow detect sensitive data on retrieval flow mask sensitive data on retrieval """Mask any sensitive data found in the relevant chunks from the knowledge base.""" + global $relevant_chunks $relevant_chunks = await MaskSensitiveDataAction(source="retrieval", text=$relevant_chunks)