Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,45 @@ public interface LlmAssistant {
"As a software architect, and based on your previous answer, generate other potential missing endpoints that are not mentioned in the OpenAPI file. For example, if there is GET /product/1, suggest DELETE /product/1 if it's not mentioned")
HttpRequestList complete();

@SystemMessage(
"You are a web application security expert reviewing potential false positives. Answer only in JSON.")
@UserMessage(
"Your task is to review the following finding from ZAP (Zed Attack Proxy).\n"
+ "The confidence level is a pull down field which allows you to specify how confident you are in the validity of the finding : \n"
+ "- 0 if it's False Positive\n"
+ "- 1 if it's Low\n"
+ "- 2 if it's Medium\n"
+ "- 3 if it's High\n"
+ "\n"
+ "The alert is described as follows : {{description}}\n"
+ "\n"
+ "As evidence, the HTTP response contains :\n"
+ "---\n"
+ "{{evidence}}\n"
+ "---\n"
+ "Provide a short consistent explanation of the new score.\n")
static final String ALERT_REVIEW_SYSTEM_MSG =
"You are a web application security expert reviewing potential false positives. Answer only in JSON.";
static final String ALERT_REVIEW_GOAL =
"Provide a short consistent explanation of the new score.\n";
static final String ALERT_REVIEW_PROMPT =
"""
Your task is to review the following finding from ZAP (Zed Attack Proxy).
The confidence level is a pull down field which allows you to specify how confident you are in the validity of the finding:
- 0 if it's False Positive
- 1 if it's Low
- 2 if it's Medium
- 3 if it's High
The alert is described as follows: {{description}}

As evidence, the HTTP message contains:
---
{{evidence}}
---
"""
+ ALERT_REVIEW_GOAL;

static final String ALERT_REVIEW_OTHERINFO_PROMPT =
ALERT_REVIEW_PROMPT
+ """
Also, here's some additional information that may be useful for you to reach your conclusion:
---
{{otherinfo}}
---
"""
+ ALERT_REVIEW_GOAL;

@SystemMessage(ALERT_REVIEW_SYSTEM_MSG)
@UserMessage(ALERT_REVIEW_PROMPT)
Confidence review(@V("description") String description, @V("evidence") String evidence);

@SystemMessage(ALERT_REVIEW_SYSTEM_MSG)
@UserMessage(ALERT_REVIEW_OTHERINFO_PROMPT)
Confidence review(
@V("description") String description,
@V("evidence") String evidence,
@V("otherinfo") String otherinfo);
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -75,6 +76,11 @@ public LlmCommunicationService(LlmOptions options) {
requestor = new Requestor(HttpSender.MANUAL_REQUEST_INITIATOR, new HistoryPersister());
}

/** For testing purposes only. */
LlmCommunicationService(LlmAssistant assistant) {
this.llmAssistant = assistant;
}

private ChatLanguageModel buildModel(LlmOptions options) {
return switch (options.getModelProvider()) {
case AZURE_OPENAI ->
Expand Down Expand Up @@ -170,7 +176,13 @@ public void reviewAlert(Alert alert) {
LOGGER.debug("Reviewing alert : {}", alert.getName());
LOGGER.debug("Confidence level from ZAP : {}", alert.getConfidence());
Stats.incCounter("stats.llm.alertreview.call");
llmConfidence = llmAssistant.review(alert.getDescription(), alert.getEvidence());
if (alert.getOtherInfo().isBlank()) {
llmConfidence = llmAssistant.review(alert.getDescription(), alert.getEvidence());
} else {
llmConfidence =
llmAssistant.review(
alert.getDescription(), alert.getEvidence(), alert.getOtherInfo());
}

if (llmConfidence.getLevel() == alert.getConfidence()) {
Stats.incCounter("stats.llm.alertreview.result.same");
Expand All @@ -184,7 +196,7 @@ public void reviewAlert(Alert alert) {
llmConfidence.getExplanation());
updatedAlert.setConfidence(llmConfidence.getLevel());
updatedAlert.setOtherInfo(getUpdatedOtherInfo(alert, llmConfidence));
Map<String, String> alertTags = alert.getTags();
Map<String, String> alertTags = new HashMap<>(alert.getTags());

alertTags.putIfAbsent(AI_REVIEWED_TAG_KEY, "");
updatedAlert.setTags(alertTags);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,35 @@

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasEntry;
import static org.hamcrest.Matchers.is;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.BDDMockito.given;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;

import java.util.Map;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.parosproxy.paros.Constant;
import org.parosproxy.paros.core.scanner.Alert;
import org.zaproxy.addon.llm.communication.Confidence;
import org.zaproxy.zap.testutils.TestUtils;
import org.zaproxy.zap.utils.I18N;

/** Unit test for {@link LlmCommunicationService}. */
class LlmCommunicationServiceUnitTest extends TestUtils {

private static final Confidence CONFIDENCE =
new Confidence(Alert.CONFIDENCE_MEDIUM, "explanation");

@BeforeAll
static void beforeAll() {
Constant.messages = mock(I18N.class);
}

@Test
void shouldNotBeConsideredReviewdIfNoTags() {
// Given
Expand Down Expand Up @@ -62,4 +81,47 @@ void shouldBeConsideredReviewdIfMarkedAsSuch() {
// Then
assertThat(result, is(equalTo(true)));
}

@ParameterizedTest
@ValueSource(strings = {"", " ", "\t", "\r", "\n"})
void shouldUseTwoParamReviewMethodWhenNoOtherInfo(String otherInfo) {
// Given
LlmAssistant assistant = mock();
LlmCommunicationService service = new LlmCommunicationService(assistant);

Alert alert = createBaseAlert();
alert.setOtherInfo(otherInfo);

given(assistant.review(anyString(), anyString())).willReturn(CONFIDENCE);
// When
service.reviewAlert(alert);
// Then
verify(assistant).review(anyString(), anyString());
assertThat(alert.getTags(), hasEntry(LlmCommunicationService.AI_REVIEWED_TAG_KEY, ""));
}

@Test
void shouldUseThreeParamReviewMethodWhenHasOtherInfo() {
// Given
LlmAssistant assistant = mock();
LlmCommunicationService service = new LlmCommunicationService(assistant);

Alert alert = createBaseAlert();
alert.setOtherInfo("other info");

given(assistant.review(anyString(), anyString(), anyString())).willReturn(CONFIDENCE);
// When
service.reviewAlert(alert);
// Then
verify(assistant).review(anyString(), anyString(), anyString());
assertThat(alert.getTags(), hasEntry(LlmCommunicationService.AI_REVIEWED_TAG_KEY, ""));
}

private static Alert createBaseAlert() {
return Alert.builder()
.setDescription("desc")
.setEvidence("evidence")
.setConfidence(Alert.CONFIDENCE_MEDIUM)
.build();
}
}