Azure · AdrGav941 · Sep 22, 2025 · Sep 22, 2025 · Sep 22, 2025 · Sep 22, 2025
diff --git a/pyrit/datasets/__init__.py b/pyrit/datasets/__init__.py
@@ -33,6 +33,7 @@
     fetch_jbb_behaviors_by_harm_category,
     fetch_jbb_behaviors_by_jbb_category,
 )
+from pyrit.datasets.fetch_jailbreakv_28k_dataset import fetch_jailbreakv_28k_dataset
 
 
 __all__ = [
@@ -64,4 +65,5 @@
     "fetch_jbb_behaviors_dataset",
     "fetch_jbb_behaviors_by_harm_category",
     "fetch_jbb_behaviors_by_jbb_category",
+    "fetch_jailbreakv_28k_dataset",
 ]
diff --git a/pyrit/datasets/fetch_jailbreakv_28k_dataset.py b/pyrit/datasets/fetch_jailbreakv_28k_dataset.py
@@ -0,0 +1,200 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import logging
+import uuid
+from typing import Dict, List, Literal, Optional
+
+from datasets import load_dataset
+from huggingface_hub import HfApi, hf_hub_download
+
+from pyrit.models import SeedPrompt, SeedPromptDataset
+
+logger = logging.getLogger(__name__)
+
+HarmLiteral = Literal[
+    "Unethical Behavior",
+    "Economic Harm",
+    "Hate Speech",
+    "Government Decision",
+    "Physical Harm",
+    "Fraud",
+    "Political Sensitivity",
+    "Malware",
+    "Illegal Activity",
+    "Bias",
+    "Violence",
+    "Animal Abuse",
+    "Tailored Unlicensed Advice",
+    "Privacy Violation",
+    "Health Consultation",
+    "Child Abuse Content",
+]
+
+
+def fetch_jailbreakv_28k_dataset(
+    *,
+    data_home: Optional[str] = None,
+    split: Literal["JailBreakV_28K", "mini_JailBreakV_28K"] = "mini_JailBreakV_28K",
+    text_field: Literal["jailbreak_query", "redteam_query"] = "redteam_query",
+    harm_categories: Optional[List[HarmLiteral]] = None,
+) -> SeedPromptDataset:
+    """
+    Fetch examples from the JailBreakV 28k Dataset with optional filtering and create a SeedPromptDataset.
+
+    Args:
+        data_home: Directory used as cache_dir in call to HF to store cached data. Defaults to None.
+        split (str): The split of the dataset to fetch. Defaults to "mini_JailBreakV_28K".
+            Options are "JailBreakV_28K" and "mini_JailBreakV_28K".
+        text_field (str): The field to use as the prompt text. Defaults to "redteam_query".
+            Options are "jailbreak_query" and "redteam_query".
+        harm_categories: List of harm categories to filter the examples.
+            Defaults to None, which means all categories are included.
+            Otherwise, only prompts with at least one matching category are included.
+
+    Returns:
+        SeedPromptDataset: A SeedPromptDataset containing the filtered examples.
+
+    Note:
+        For more information and access to the original dataset and related materials, visit:
+        https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k/blob/main/README.md \n
+        Related paper: https://arxiv.org/abs/2404.03027 \n
+        The dataset license: mit
+        authors: Weidi Luo, Siyuan Ma, Xiaogeng Liu, Chaowei Xiao, Xiaoyu Guo
-        The dataset license: mit
-        authors: Weidi Luo, Siyuan Ma, Xiaogeng Liu, Chaowei Xiao, Xiaoyu Guo
+        The dataset license: MIT
+        Authors: Weidi Luo, Siyuan Ma, Xiaogeng Liu, Chaowei Xiao, Xiaoyu Guo
-        The dataset license: mit
-        authors: Weidi Luo, Siyuan Ma, Xiaogeng Liu, Chaowei Xiao, Xiaoyu Guo
+        The dataset license: MIT
+        Authors: Weidi Luo, Siyuan Ma, Xiaogeng Liu, Chaowei Xiao, Xiaoyu Guo
+
+    Warning:
+        Due to the nature of these prompts, it may be advisable to consult your relevant legal
+        department before testing them with LLMs to ensure compliance and reduce potential risks.
+    """
+
+    source = "JailbreakV-28K/JailBreakV-28k"
+
+    try:
+        logger.info(f"Loading JailBreakV-28k dataset from {source}")
+
+        # Normalize the harm categories to match pyrit harm category conventions
+        harm_categories_normalized = (
+            None if not harm_categories else [_normalize_policy(policy) for policy in harm_categories]
+        )
+
+        # Load the dataset from HuggingFace
+        data = load_dataset(source, "JailBreakV_28K", cache_dir=data_home)
+
+        dataset_split = data[split]
+
+        per_call_cache: Dict[str, str] = {}
+
+        seed_prompts = []
+
+        # Define common metadata that will be used across all seed prompts
+        common_metadata = {
+            "dataset_name": "JailbreakV-28K",
+            "authors": ["Weidi Luo", "Siyuan Ma", "Xiaogeng Liu", "Chaowei Xiao", "Xiaoyu Guo"],
+            "description": (
+                "Benchmark for Assessing the Robustness of "
+                "Multimodal Large Language Models against Jailbreak Attacks. "
+            ),
+            "groups": ["The Ohio State University", "Peking University", "University of Wisconsin-Madison"],
+            "source": "https://huggingface.co/datasets/JailbreakV-28K/JailBreakV-28k",
+            "name": "JailBreakV-28K",
+        }
+
+        # tracker for items in the dataset where image_path does not match an image in the repo
+        missing_images = 0
+
+        for item in dataset_split:
+            policy = _normalize_policy(item.get("policy", ""))
+            # Skip if user requested policy filter and items policy does not match
+            if not (harm_categories_normalized) or policy in harm_categories_normalized:
+                image_rel_path = item.get("image_path", "")
+                image_abs_path = ""
+                if image_rel_path:
+                    image_abs_path = _resolve_image_path(
+                        image_rel_path, repo_id=source, data_home=data_home, call_cache=per_call_cache
+                    )
+                if image_abs_path:
+                    group_id = uuid.uuid4()
+                    text_seed_prompt = SeedPrompt(
+                        value=item.get(text_field, ""),
+                        harm_categories=[policy],
+                        prompt_group_id=group_id,
+                        data_type="text",
+                        **common_metadata,  # type: ignore[arg-type]
+                    )
+                    image_seed_prompt = SeedPrompt(
+                        value=image_abs_path,
+                        harm_categories=[policy],
+                        prompt_group_id=group_id,
+                        data_type="image_path",
+                        **common_metadata,  # type: ignore[arg-type]
+                    )
+                    seed_prompts.append(text_seed_prompt)
+                    seed_prompts.append(image_seed_prompt)
+                else:
+                    missing_images += 1
-                if image_abs_path:
-                    group_id = uuid.uuid4()
-                    text_seed_prompt = SeedPrompt(
-                        value=item.get(text_field, ""),
-                        harm_categories=[policy],
-                        prompt_group_id=group_id,
-                        data_type="text",
-                        **common_metadata,  # type: ignore[arg-type]
-                    )
-                    image_seed_prompt = SeedPrompt(
-                        value=image_abs_path,
-                        harm_categories=[policy],
-                        prompt_group_id=group_id,
-                        data_type="image_path",
-                        **common_metadata,  # type: ignore[arg-type]
-                    )
-                    seed_prompts.append(text_seed_prompt)
-                    seed_prompts.append(image_seed_prompt)
-                else:
-                    missing_images += 1
+                if not image_abs_path:
+                    missing_images += 1
+                    continue
+                    
+                group_id = uuid.uuid4()
+                text_seed_prompt = SeedPrompt(
+                    value=item.get(text_field, ""),
+                    harm_categories=[policy],
+                    prompt_group_id=group_id,
+                    data_type="text",
+                    **common_metadata,  # type: ignore[arg-type]
+                )
+                image_seed_prompt = SeedPrompt(
+                    value=image_abs_path,
+                    harm_categories=[policy],
+                    prompt_group_id=group_id,
+                    data_type="image_path",
+                    **common_metadata,  # type: ignore[arg-type]
+                )
+                seed_prompts.append(text_seed_prompt)
+                seed_prompts.append(image_seed_prompt)
+
-                if image_abs_path:
-                    group_id = uuid.uuid4()
-                    text_seed_prompt = SeedPrompt(
-                        value=item.get(text_field, ""),
-                        harm_categories=[policy],
-                        prompt_group_id=group_id,
-                        data_type="text",
-                        **common_metadata,  # type: ignore[arg-type]
-                    )
-                    image_seed_prompt = SeedPrompt(
-                        value=image_abs_path,
-                        harm_categories=[policy],
-                        prompt_group_id=group_id,
-                        data_type="image_path",
-                        **common_metadata,  # type: ignore[arg-type]
-                    )
-                    seed_prompts.append(text_seed_prompt)
-                    seed_prompts.append(image_seed_prompt)
-                else:
-                    missing_images += 1
+                if not image_abs_path:
+                    missing_images += 1
+                    continue
+                    
+                group_id = uuid.uuid4()
+                text_seed_prompt = SeedPrompt(
+                    value=item.get(text_field, ""),
+                    harm_categories=[policy],
+                    prompt_group_id=group_id,
+                    data_type="text",
+                    **common_metadata,  # type: ignore[arg-type]
+                )
+                image_seed_prompt = SeedPrompt(
+                    value=image_abs_path,
+                    harm_categories=[policy],
+                    prompt_group_id=group_id,
+                    data_type="image_path",
+                    **common_metadata,  # type: ignore[arg-type]
+                )
+                seed_prompts.append(text_seed_prompt)
+                seed_prompts.append(image_seed_prompt)
+
+    except Exception as e:
+        logger.error(f"Failed to load JailBreakV-28K dataset: {str(e)}")
+        raise Exception(f"Error loading JailBreakV-28K dataset: {str(e)}")
+    if missing_images:
+        logger.warning(f"Failed to resolve {missing_images} image paths in JailBreakV-28K dataset")
+    if not seed_prompts:
+        raise ValueError(
+            "JailBreakV-28K fetch produced 0 prompts. "
+            "Likely caused by all items returned after filtering having invalid image paths."
+        )
+    seed_prompt_dataset = SeedPromptDataset(prompts=seed_prompts)
+    return seed_prompt_dataset
+
+
+def _normalize_policy(policy: str) -> str:
+    """Create a machine-friendly variant alongside the human-readable policy."""
+    return policy.strip().lower().replace(" ", "_").replace("-", "_")
+
+
+def _resolve_image_path(
+    rel_path: str,
+    repo_id: str,
+    data_home: Optional[str],
+    call_cache: Dict[str, str] = {},
+) -> str:
+    """
+    Resolve a repo-relative image path to a local absolute path using hf_hub_download.
+    Uses a cache (module-level by default) to avoid re-downloading the same file.
+
+    Args:
+        rel_path: path relative to the dataset repository root (e.g., "images/0001.png").
+        repo_id: HF dataset repo id, e.g., "JailbreakV-28K/JailBreakV-28k".
+        data_home: optional cache directory.
+        cache: optional dict to use instead of the module-level cache.
+
+    Returns:
+        Absolute local path if resolved, else None (and caches the miss).
+    """
+    if not rel_path:
+        return ""
+
+    # check if image has already been cached
+    if rel_path in call_cache:
+        return call_cache[rel_path]
+    path_root = "JailBreakV_28K"
+    hf_path = f"{path_root}/{rel_path}"
+    try:
+        # first check if the path exists using HFApi()
+        repo_file_list = HfApi().list_repo_files(repo_id=repo_id, repo_type="dataset")
+        if hf_path not in repo_file_list:
+            logger.debug(f"File {hf_path} not found in dataset {repo_id}")
+            call_cache[rel_path] = ""
+            return ""
+        # download the image
+        abs_path = hf_hub_download(
+            repo_id=repo_id,
+            repo_type="dataset",
+            filename=hf_path,
+            cache_dir=data_home,
+        )
+        call_cache[rel_path] = abs_path
+        return abs_path
+    except Exception as e:
+        logger.error(f"Failed to download image {rel_path}: {str(e)}")
+        call_cache[rel_path] = ""
+        return ""
diff --git a/tests/integration/datasets/test_fetch_datasets.py b/tests/integration/datasets/test_fetch_datasets.py
@@ -13,6 +13,7 @@
     fetch_equitymedqa_dataset_unique_values,
     fetch_forbidden_questions_dataset,
     fetch_harmbench_dataset,
+    fetch_jailbreakv_28k_dataset,
     fetch_jbb_behaviors_by_harm_category,
     fetch_jbb_behaviors_by_jbb_category,
     fetch_jbb_behaviors_dataset,
@@ -46,6 +47,7 @@
         (fetch_equitymedqa_dataset_unique_values, True),
         (fetch_forbidden_questions_dataset, True),
         (fetch_harmbench_dataset, True),
+        (fetch_jailbreakv_28k_dataset, True),
         (fetch_jbb_behaviors_dataset, True),
         (fetch_librAI_do_not_answer_dataset, True),
         (fetch_llm_latent_adversarial_training_harmful_dataset, True),
@@ -94,3 +96,28 @@ def test_fetch_jbb_behaviors_by_jbb_category():
         assert len(hate_prompts.prompts) > 0
     except Exception as e:
         pytest.skip(f"Integration test skipped due to: {e}")
+
+
+def test_fetch_jailbreakv_28k_dataset():
+    """Integration test for fetching jailbreakv_28k dataset with real data."""
+    try:
+        jailbreakv_28k = fetch_jailbreakv_28k_dataset()
+        assert isinstance(jailbreakv_28k, SeedPromptDataset)
+        assert len(jailbreakv_28k.prompts) > 0
+        assert sum(p.data_type == "text" for p in jailbreakv_28k.prompts) == len(jailbreakv_28k.prompts) / 2
+        assert sum(p.data_type == "image_path" for p in jailbreakv_28k.prompts) == len(jailbreakv_28k.prompts) / 2
+    except Exception as e:
+        pytest.skip(f"Integration test skipped due to: {e}")
+
+
+def test_fetch_jailbreakv_28k_dataset_by_harm_category():
+    """Integration test for filtering jailbreakv_28k git by harm category with real data."""
+    try:
+        # Filter for a category whose items have a valid image_path
+        jailbreakv_28k = fetch_jailbreakv_28k_dataset(harm_categories=["Economic Harm"])
+        assert isinstance(jailbreakv_28k, SeedPromptDataset)
+        assert len(jailbreakv_28k.prompts) > 0
+        assert sum(p.data_type == "text" for p in jailbreakv_28k.prompts) == len(jailbreakv_28k.prompts) / 2
+        assert sum(p.data_type == "image_path" for p in jailbreakv_28k.prompts) == len(jailbreakv_28k.prompts) / 2
+    except Exception as e:
+        pytest.skip(f"Integration test skipped due to: {e}")
diff --git a/tests/unit/datasets/test_fetch_jailbreakv_28k.py b/tests/unit/datasets/test_fetch_jailbreakv_28k.py
@@ -0,0 +1,88 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from contextlib import nullcontext
+from unittest.mock import patch
+
+import pytest
+
+from pyrit.datasets.fetch_jailbreakv_28k_dataset import fetch_jailbreakv_28k_dataset
+from pyrit.models import SeedPrompt, SeedPromptDataset
+
+
+class TestFetchJailbreakv28kDataset:
+    """Test suite for the fetch_jailbreakv_28k_dataset function."""
+
+    @pytest.mark.parametrize("text_field", [None, "jailbreak_query"])
+    @pytest.mark.parametrize(
+        "harm_categories",
+        [None, ["Economic Harm"], ["Government Decision"]],
+    )
+    @patch("pyrit.datasets.fetch_jailbreakv_28k_dataset._resolve_image_path")
+    @patch("pyrit.datasets.fetch_jailbreakv_28k_dataset.load_dataset")
+    def test_fetch_jailbreakv_28k_dataset_success(
+        self, mock_load_dataset, mock_resolve_image_path, text_field, harm_categories
+    ):
+        # Mock dataset response
+        mock_dataset = {
+            "mini_JailBreakV_28K": [
+                {
+                    "redteam_query": "test query 1",
+                    "jailbreak_query": "jailbreak: test query 1",
+                    "policy": "Economic Harm",
+                    "image_path": "mock_folder/valid",
+                },
+                {
+                    "redteam_query": "test query 2",
+                    "jailbreak_query": "jailbreak: test query 2",
+                    "policy": "Government Decision",
+                    "image_path": "invalid",
+                },
+                {
+                    "redteam_query": "test query 3",
+                    "jailbreak_query": "jailbreak: test query 3",
+                    "policy": "Fraud",
+                    "image_path": "mock_folder/valid",
+                },
+            ]
+        }
+        mock_load_dataset.return_value = mock_dataset
+
+        def fake_resolve_image_path(rel_path: str, **kwargs) -> str:
+            return "" if rel_path == "invalid" else f"mock_path/{rel_path}"
+
+        mock_resolve_image_path.side_effect = fake_resolve_image_path
+
+        # Call the function
+        # Select context: expect error only for this filter
+        expect_error = harm_categories == ["Government Decision"]
+        ctx = pytest.raises(ValueError) if expect_error else nullcontext()
+
+        # Single call
+        with ctx:
+            result = fetch_jailbreakv_28k_dataset(text_field=text_field, harm_categories=harm_categories)
+        if expect_error:
+            return
+        # Assertions
+
+        assert isinstance(result, SeedPromptDataset)
+        if harm_categories is None:
+            assert len(result.prompts) == 4
+            assert sum(p.data_type == "text" for p in result.prompts) == 2
+            assert sum(p.data_type == "image_path" for p in result.prompts) == 2
+        elif harm_categories == ["Economic Harm"]:
+            assert len(result.prompts) == 2
+            assert sum(p.data_type == "text" for p in result.prompts) == 1
+            assert sum(p.data_type == "image_path" for p in result.prompts) == 1
+            print(result.prompts)
+            assert result.prompts[0].harm_categories == ["economic_harm"]
+        assert all(isinstance(prompt, SeedPrompt) for prompt in result.prompts)
+        print(result.prompts)
+        if text_field == "jailbreak_query":
+            for prompt in result.prompts:
+                if prompt.data_type == "text":
+                    assert "jailbreak" in prompt.value
+        else:
+            for prompt in result.prompts:
+                if prompt.data_type == "text":
+                    assert "jailbreak" not in prompt.value