punk-security · SimonGurney · May 13, 2025 · May 13, 2025 · May 13, 2025 · github-actions
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,4 @@
 **/__pycache__
 .vscode/**
 findings.csv
+apps/**
diff --git a/saist.personalities b/saist.personalities
@@ -0,0 +1,44 @@
+typos:
+    prompt: |
+        You are a code reviewer analyzing a single file's diff from a Pull Request. 
+        Your task is to identify typos
+        Only report on typos. Return nothing if no typos found
+    priority: 5
+security: 
+    prompt: | 
+        You are a security reviewer analyzing a single file's diff from a Pull Request. 
+        Only identify confirmed, high-confidence vulnerabilities introduced or modified in the diff.
+
+        # Strict rules:
+
+        Do not report vague or speculative issues like "potential path traversal" or "hardcoded secrets" unless they are clearly 
+        exploitable and directly related to the categories above.
+
+        Do not report issues based only on pattern-matching or tool output—require code context and confirmation.
+        Retrieve the full file and other relevant files for context only after a suspicious change is detected in the diff.
+
+        A severity rating from 1 to 9 (9 is most critical)
+        Only report confirmed, context-aware vulnerabilities within the scope defined above
+    priority: 1
+codequality: 
+    prompt: |
+        You are a code reviewer analyzing a single file's diff from a Pull Request. 
+        Your task is to identify bad development patterns introduced or modified in the diff.
+        Focus only on poor coding practices that may lead to long-term maintainability, reliability, or readability issues. 
+        Do not report security vulnerabilities or speculative risks.
+
+        Rules:
+        Only analyze changes in the diff. Ignore unchanged code or tool-generated output.
+        Retrieve the full file or other files for context only if needed to confirm the presence of a bad pattern.
+        Do not flag stylistic or formatting issues unless they reflect a deeper anti-pattern.
+        Examples of bad development patterns include:
+        Copy-pasted logic instead of reusable code
+        Excessive code nesting or deeply nested conditionals
+        Catch-all exception handling (e.g., catch(Exception) without handling)
+        Business logic in controllers or views
+        Logic dependent on hardcoded values where abstraction is expected
+        Functions or classes that are too long or do too much
+        Use of magic numbers or unclear naming
+
+        Only report confirmed, code-level development anti-patterns present in the diff.
+    priority: 3
diff --git a/saist/main.py b/saist/main.py
@@ -20,7 +20,7 @@
 from scm.adapters.git import GitAdapter
 from util.git import parse_unified_diff
 from util.filtering import should_process
-from util.prompts import prompts
+from util import prompts
 from scm.adapters.github import Github
 from scm import Scm
 from shell import Shell
@@ -31,7 +31,6 @@
 
 from util.output import print_banner, write_csv
 
-prompts = prompts()
 load_dotenv(".env")
 
 logger = logging.getLogger("saist")
@@ -40,22 +39,24 @@ async def analyze_single_file(scm: Scm, adapter: BaseLlmAdapter, filename, patch
     """
     Analyzes a SINGLE file diff with OpenAI, returning a Findings object or None on error.
     """
-    system_prompt = prompts.DETECT
     logger.debug(f"Processing {filename}")
     prompt = (
         f"\n\nFile: {filename}\n{patch_text}\n"
     )
-    try:
-        return (await adapter.prompt_structured(system_prompt, prompt, Findings, [scm.read_file_contents])).findings
-    except Exception as e:
-        logger.error(f"[Error] File '{filename}': {e}")
-        return None
+    findings = []
+    for analyst in prompts.analysts.keys():
+        system_prompt = prompts.analysts[analyst].PROMPT
+        try:
+            findings += (await adapter.prompt_structured(system_prompt, prompt, Findings, [scm.read_file_contents])).findings
+        except Exception as e:
+            logger.error(f"[Error] File '{filename}': {e}")
+    return findings
 
 def generate_summary_from_findings(adapter: BaseLlmAdapter, findings: list[Finding]) -> str:
     """
     Uses OpenAI to generate a summary of all findings to be used as the PR review body.
     """
-    system_prompt = prompts.SUMMARY
+    system_prompt = prompts.summary_writer.PROMPT
     for f in findings:
         prompt = f"- **File**: `{f.file}`\n  - **Issue**: {f.issue}\n  - **Recommendation**: {f.recommendation}\n\n"
 
@@ -212,8 +213,16 @@ async def main():
 
         # Basic checks
         if not file_name or not snippet or not issue:
+            logging.debug("validation error for item")
+            item.line_number = -1
+            continue
+        if "\n" in snippet:
+            logging.debug("Code snippet contains multiple lines")
+            item.line_number = -1
             continue
         if file_name not in file_line_maps:
+            logging.debug(f"{file_name} does not exist...")
+            item.line_number = -1
             # Possibly flagged a file that doesn't exist in the PR
             continue
 
@@ -228,6 +237,7 @@ async def main():
                 break
 
         if not matched_new_line:
+            logging.debug(f"Line '{snippet}' does not exist...")
             # If we can't find the snippet in the patch, skip
             item.line_number = -1
             continue
@@ -252,8 +262,29 @@ async def main():
     all_findings = list([x for x in all_findings if x.line_number != -1])
 
     if not all_findings:
-        print("No issues detected")
+        print("Followig validation, no valid issues detected")
         exit(0)
+
+    print(f"✨ Validation complete! Identified {len(all_findings)} issues.\n")
+
+    # Deduplicate all_findings based on (file, line_number, cwe)
+
+    seen = set()
+    deduped_findings = []
+
+    for finding in all_findings:
+        if finding.cwe == "N/A":
+            deduped_findings.append(finding)
+            continue
+        key = (finding.file, finding.line_number, finding.cwe)
+        if key not in seen:
+            seen.add(key)
+            deduped_findings.append(finding)
+
+    all_findings = deduped_findings
+
+    print(f"🚀 Deduplication complete! Identified {len(all_findings)} issues.\n")
+
 
     if args.interactive:
         s = Shell(llm, scm, all_findings)

diff --git a/saist/models.py b/saist/models.py
@@ -3,7 +3,8 @@
 
 class Finding(BaseModel):
     file: str
-    snippet: Annotated[str, Field(description= "a single line code snipper containing the security issue") ]
+    category: str
+    snippet: Annotated[str, Field(description= "the single line of code snippet from the file most relevant to the detected issue") ]
     issue: str
     recommendation: str
     cwe: str

diff --git a/saist/util/prompts.py b/saist/util/prompts.py
@@ -1,32 +1,61 @@
-class prompts():
-    SUMMARY_PRE = """
+import os
+import yaml
+
+class personality():
+    def __init__(self, prompt_body, prompt_suffix = None, priority = None):
+        self.prompt_body = prompt_body
+        self.prompt_suffix = prompt_suffix
+        if not priority:
+            priority = 1
+        self.priority = priority
+
+    @property
+    def PROMPT(self):
+        return self.prompt_body + self.prompt_suffix
+
+FILE_ANALYSIS_COMMON_SUFFIX = "Below is the diff for this single file. It starts with 'File: <filename>' followed by the unified diff.\n"
+
+summary_writer = personality(
+    prompt_body = """
     You are a senior application security engineer.
     Given the following list of findings (issue descriptions and recommendations)
     Write a concise but informative summary suitable for a GitHub Pull Request review comment.
     It should be just a few sentences.
     Group similar issues, and prioritize by severity. Use markdown formatting.
     Return only the markdown summary, no other text. Do not put the markdown inside ```
-    """
-    SUMMARY_POST = """
-    findings:
-    """
-    DETECT_PRE = """
-    You are a security reviewer analyzing a single file's diff from a Pull Request.
-    Look for issues in the OWASP top ten. Identify as many as you can.
-    Report multiple issues per line as seperate findings.
-    When you detect a vulnerability get the full file by retrieving its contents, use this for context.
-    You can also retrieve other files for context as needed.
-    Only report a vulnerability if exists in the original diff.
-    Do not report vulnerabilities that exist only in tool output
-    Provide a vulnerability priority between 1 and 9. 9 is most critical
-    Map each finding to a Common Weakness Enumeration ID (CWE).
-    """
-    DETECT_POST = """"
-        Below is the diff for this single file. It starts with 'File: <filename>' followed by the unified diff.\n"
-    """
-    @property
-    def SUMMARY(self):
-        return self.SUMMARY_PRE + self.SUMMARY_POST
-    @property
-    def DETECT(self):
-        return self.DETECT_PRE + self.DETECT_POST
+    """,
+    prompt_suffix = "findings:" 
+)
+
+def load_personalities(file_path='saist.personalities'):
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"File '{file_path}' not found.")
+
+    with open(file_path, 'r') as file:
+        try:
+            personalities = yaml.safe_load(file)
+            if not isinstance(personalities, dict):
+                raise ValueError("YAML content is not a dictionary.")
+
+            for item_name, item_data in personalities.items():
+                if not isinstance(item_data, dict):
+                    raise ValueError(f"Item '{item_name}' must be a dictionary.")
+                if 'priority' not in item_data:
+                    raise ValueError(f"Item '{item_name}' is missing required field: 'priority'")
+                if 'prompt' not in item_data:
+                    raise ValueError(f"Item '{item_name}' is missing required field: 'prompt'")
+
+            return personalities
+
+        except yaml.YAMLError as e:
+            raise ValueError(f"Error parsing YAML file: {e}")
+
+personalities_dict = load_personalities()
+
+analysts = {
+    name: personality(
+        data["prompt"], 
+        f"Set the Category to {name}. Set CWE to the format CWE-XXX or N/A if a CWE is not relevant" + 
+        FILE_ANALYSIS_COMMON_SUFFIX, priority=data["priority"]) 
+        for name, data in personalities_dict.items()
+}