Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions nemoguardrails/evaluate/cli/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ def moderation(
),
write_outputs: bool = typer.Option(True, help="Write outputs to file"),
split: str = typer.Option("harmful", help="Whether prompts are harmful or helpful"),
enable_translation: bool = typer.Option(
False, help="Enable translation functionality"
),
translation_config: str = typer.Option(
"nemoguardrails/evaluate/langproviders/configs/translation.yaml",
help="Path to translation configuration file",
),
):
"""
Evaluate the performance of the moderation rails defined in a Guardrails application.
Expand All @@ -150,6 +157,8 @@ def moderation(
Defaults to "eval_outputs/moderation".
write_outputs (bool): Write outputs to file. Defaults to True.
split (str): Whether prompts are harmful or helpful. Defaults to "harmful".
enable_translation (bool): Enable translation functionality. Defaults to False.
translation_config (str): Path to translation configuration file. Defaults to None.
"""
moderation_check = ModerationRailsEvaluation(
config,
Expand All @@ -160,6 +169,8 @@ def moderation(
output_dir,
write_outputs,
split,
enable_translation,
translation_config,
)
typer.echo(f"Starting the moderation evaluation for data: {dataset_path} ...")
moderation_check.run()
Expand All @@ -178,6 +189,13 @@ def hallucination(
"eval_outputs/hallucination", help="Output directory"
),
write_outputs: bool = typer.Option(True, help="Write outputs to file"),
enable_translation: bool = typer.Option(
False, help="Enable translation functionality"
),
translation_config: str = typer.Option(
"nemoguardrails/evaluate/langproviders/configs/translation.yaml",
help="Path to translation configuration file",
),
):
"""
Evaluate the performance of the hallucination rails defined in a Guardrails application.
Expand All @@ -190,13 +208,17 @@ def hallucination(
num_samples (int): Number of samples to evaluate. Defaults to 50.
output_dir (str): Output directory. Defaults to "eval_outputs/hallucination".
write_outputs (bool): Write outputs to file. Defaults to True.
enable_translation (bool): Enable translation functionality. Defaults to False.
translation_config (str): Path to translation configuration file. Defaults to None.
"""
hallucination_check = HallucinationRailsEvaluation(
config,
dataset_path,
num_samples,
output_dir,
write_outputs,
enable_translation,
translation_config,
)
typer.echo(f"Starting the hallucination evaluation for data: {dataset_path} ...")
hallucination_check.run()
Expand Down
55 changes: 51 additions & 4 deletions nemoguardrails/evaluate/evaluate_hallucination.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import json
import logging
import os
Expand All @@ -22,6 +23,7 @@
import typer

from nemoguardrails import LLMRails
from nemoguardrails.actions.llm.utils import llm_call
from nemoguardrails.evaluate.utils import load_dataset
from nemoguardrails.llm.params import llm_params
from nemoguardrails.llm.prompts import Task
Expand All @@ -40,6 +42,8 @@ def __init__(
num_samples: int = 50,
output_dir: str = "outputs/hallucination",
write_outputs: bool = True,
enable_translation: bool = False,
translation_config: str = None,
):
"""
A hallucination rails evaluation has the following parameters:
Expand All @@ -50,6 +54,8 @@ def __init__(
- num_samples: number of samples to evaluate
- output_dir: directory to write the hallucination predictions
- write_outputs: whether to write the predictions to file
- enable_translation: whether to enable translation functionality
- translation_config: path to translation configuration file
"""

self.config_path = config
Expand All @@ -60,7 +66,34 @@ def __init__(
self.llm_task_manager = LLMTaskManager(self.rails_config)

self.num_samples = num_samples
self.dataset = load_dataset(self.dataset_path)[: self.num_samples]
self.enable_translation = enable_translation
self.translation_config = translation_config

# Initialize translation provider if enabled
self.translator = None
self.translate_to = None
if self.enable_translation:
try:
from nemoguardrails.evaluate.utils import (
_extract_target_language,
_load_langprovider,
)

self.translator = _load_langprovider(self.translation_config)
self.translate_to = _extract_target_language(self.translation_config)
print(f"✓ Translation provider initialized for {self.translate_to}")
except Exception as e:
print(f"⚠ Translation provider not available: {e}")
self.enable_translation = False

# Load dataset with optional translation
if self.enable_translation and self.translator:
self.dataset = load_dataset(
self.dataset_path, translation_config=self.translation_config
)[: self.num_samples]
else:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should print a warning if translation is enable , but the translator in None.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

self.dataset = load_dataset(self.dataset_path)[: self.num_samples]

self.write_outputs = write_outputs
self.output_dir = output_dir

Expand All @@ -71,7 +104,7 @@ def get_response_with_retries(self, prompt, max_tries=1):
num_tries = 0
while num_tries < max_tries:
try:
response = self.llm(prompt)
response = asyncio.run(llm_call(prompt=prompt, llm=self.llm))
return response
except:
num_tries += 1
Expand Down Expand Up @@ -153,7 +186,9 @@ def self_check_hallucination(self):
Task.SELF_CHECK_HALLUCINATION,
{"paragraph": paragraph, "statement": bot_response},
)
hallucination = self.llm(hallucination_check_prompt)
hallucination = asyncio.run(
llm_call(prompt=hallucination_check_prompt, llm=self.llm)
)
hallucination = hallucination.lower().strip()

prediction = {
Expand Down Expand Up @@ -194,7 +229,9 @@ def run(self):
f"{self.output_dir}/{dataset_name}_hallucination_predictions.json"
)
with open(output_path, "w") as f:
json.dump(hallucination_check_predictions, f, indent=4)
json.dump(
hallucination_check_predictions, f, indent=4, ensure_ascii=False
)
print(f"Predictions written to file {output_path}.json")


Expand All @@ -204,6 +241,12 @@ def main(
num_samples: int = typer.Option(50, help="Number of samples to evaluate"),
output_dir: str = typer.Option("outputs/hallucination", help="Output directory"),
write_outputs: bool = typer.Option(True, help="Write outputs to file"),
enable_translation: bool = typer.Option(
False, help="Enable translation functionality"
),
translation_config: str = typer.Option(
None, help="Path to translation configuration file"
),
):
"""
Main function to run the hallucination rails evaluation.
Expand All @@ -214,13 +257,17 @@ def main(
num_samples (int): Number of samples to evaluate.
output_dir (str): Output directory for predictions.
write_outputs (bool): Whether to write the predictions to a file.
enable_translation (bool): Whether to enable translation functionality.
translation_config (str): Path to translation configuration file.
"""
hallucination_check = HallucinationRailsEvaluation(
config,
data_path,
num_samples,
output_dir,
write_outputs,
enable_translation,
translation_config,
)
hallucination_check.run()

Expand Down
30 changes: 28 additions & 2 deletions nemoguardrails/evaluate/evaluate_moderation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def __init__(
output_dir: str = "outputs/moderation",
write_outputs: bool = True,
split: str = "harmful",
enable_translation: bool = False,
translation_config: str = None,
):
"""
A moderation rails evaluation has the following parameters:
Expand All @@ -54,6 +56,8 @@ def __init__(
- output_dir: directory to write the moderation predictions
- write_outputs: whether to write the predictions to file
- split: whether the dataset is harmful or helpful
- enable_translation: whether to enable translation functionality
- translation_config: path to translation configuration file
"""

self.config_path = config
Expand All @@ -67,7 +71,29 @@ def __init__(
self.check_output = check_output

self.num_samples = num_samples
self.dataset = load_dataset(self.dataset_path)[: self.num_samples]
self.enable_translation = enable_translation
self.translation_config = translation_config

# Initialize translation provider if enabled
self.translator = None
if self.enable_translation:
try:
from nemoguardrails.evaluate.utils_translate import _load_langprovider

self.translator = _load_langprovider(self.translation_config)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is done again in load_dataset . Can't we do it only once there?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. I try it.

print(f"✓ Translation provider initialized")
except Exception as e:
print(f"⚠ Translation provider not available: {e}")
self.enable_translation = False

# Load dataset with optional translation
if self.enable_translation and self.translator:
self.dataset = load_dataset(
self.dataset_path, translation_config=self.translation_config
)[: self.num_samples]
else:
self.dataset = load_dataset(self.dataset_path)[: self.num_samples]

self.split = split
self.write_outputs = write_outputs
self.output_dir = output_dir
Expand Down Expand Up @@ -266,6 +292,6 @@ def run(self):
)

with open(output_path, "w") as f:
json.dump(moderation_check_predictions, f, indent=4)
json.dump(moderation_check_predictions, f, indent=4, ensure_ascii=False)

print(f"Predictions written to file {output_path}")
Loading
Loading