Skip to content

adding a fiddlecube red-teaming provider #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion llama_stack/providers/registry/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from typing import List

from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
from llama_stack.providers.datatypes import AdapterSpec, Api, InlineProviderSpec, ProviderSpec, remote_provider_spec


def available_providers() -> List[ProviderSpec]:
Expand All @@ -25,4 +25,12 @@ def available_providers() -> List[ProviderSpec]:
Api.agents,
],
),
remote_provider_spec(
api=Api.eval,
adapter=AdapterSpec(
adapter_type="fiddlecube-red-teaming",
module="llama_stack.providers.remote.eval.fiddlecube",
pip_packages=[],
),
),
]
47 changes: 47 additions & 0 deletions llama_stack/providers/remote/eval/fiddlecube/fiddlecube.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

from typing import Any, Dict, List
from llama_stack.apis.common.job_types import Job, JobStatus
from llama_stack.apis.eval import Eval
from llama_stack.apis.eval.eval import EvalTaskConfig, EvaluateResponse


class FiddleCubeRedTeamingAdapter(Eval):
def __init__(self):
self.jobs = {}

async def run_eval(self, eval_id: str, eval_config: EvalTaskConfig) -> Job:
# call the FiddleCube API to run the red-teaming
# convert EvalTaskConfig to FiddleCube API input
# Get the FiddleCube response
# convert FiddleCube response to EvaluateResponse
# set the job_id to the length of the jobs list
# in the dict of jobs, set the job_id to the EvaluateResponse
# return the job
# refer to llama_stack/providers/inline/eval/meta_reference/eval.py for the implementation of run_eval
return Job(job_id=len(self.jobs))

async def evaluate_rows(
self, eval_id: str, input_rows: List[Dict[str, Any]], scoring_functions: List[str], eval_config: EvalTaskConfig

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Quick question: how are we passing the attack candidate and target candidate + prompt itself?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the data model is defined here:

class BenchmarkEvalTaskConfig(BaseModel):

we will use the run_eval method. not the evaluate_rows method in the api.

) -> EvaluateResponse:
raise NotImplementedError("FiddleCube Red Teaming Adapter does not support evaluate_rows")

async def job_status(self, eval_id: str, job_id: str) -> JobStatus:
if job_id in self.jobs:
return JobStatus.completed

return None

async def job_result(self, eval_id: str, job_id: str) -> EvaluateResponse:
status = await self.job_status(eval_id, job_id)
if not status or status != JobStatus.completed:
raise ValueError(f"Job is not completed, Status: {status.value}")

return self.jobs[job_id]

async def job_cancel(self, eval_id: str, job_id: str) -> None:
raise NotImplementedError("FiddleCube Red Teaming Adapter does not support job_cancel")