Merge pull request #14 from RISE-UNIBAS/mistral-ai

MHindermann · web-flow · commit 59fd500966c9 · 2025-04-02T21:23:46.000+02:00
mistral-ai
diff --git a/benchmarks/benchmarks_tests.csv b/benchmarks/benchmarks_tests.csv
@@ -20,4 +20,5 @@ T18,metadata_extraction,anthropic,claude-3-5-sonnet-20241022,Document,0.0,You ar
 T19,metadata_extraction,genai,gemini-2.5-pro-exp-03-25,Document,0.0,You are a historian with keyword knowledge and an expert in the field of 20th century Swiss history,prompt.txt,false
 T20,metadata_extraction,genai,gemini-2.0-flash-lite,Document,0.0,You are a historian with keyword knowledge and an expert in the field of 20th century Swiss history,prompt.txt,false
 T21,metadata_extraction,genai,gemini-2.0-pro-exp-02-05,Document,0.0,You are a historian with keyword knowledge and an expert in the field of 20th century Swiss history,prompt.txt,false
-T22,fraktur,genai,gemini-2.5-pro-exp-03-25,"",0.0,You are a historian with keyword knowledge and an expert in the field of 20th century Swiss history,prompt.txt,false
+T22,fraktur,genai,gemini-2.5-pro-exp-03-25,"",0.0,You are a historian with keyword knowledge and an expert in the field of 20th century Swiss history,prompt.txt,false
+T23,metadata_extraction,mistral,pixtral-large-latest,Document,0.0,You are a historian with keyword knowledge and an expert in the field of 20th century Swiss history. You only return valid JSON an no other text.,prompt.txt,false
diff --git a/scripts/__init__.py b/scripts/__init__.py
@@ -1,11 +1,20 @@
-import logging
 import os
+import time
+import logging
+
+# Ensure the logs directory exists
+log_dir = "logs"
+if not os.path.exists(log_dir):
+    os.makedirs(log_dir)
 
-log_level = os.getenv('LOG_LEVEL', 'INFO').upper()
+# Configure logging
 logging.basicConfig(
-    level=log_level,
-    format='%(asctime)s [%(levelname)s] %(message)s',
-    datefmt='%Y-%m-%d %H:%M:%S'
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s:%(name)s:%(message)s",
+    handlers=[
+        logging.FileHandler(f"{log_dir}/{time.strftime('%Y%m%d-%H%M%S')}.log"),
+        logging.StreamHandler(),
+    ]
 )
 
 logger = logging.getLogger(__name__)
diff --git a/scripts/benchmark_base.py b/scripts/benchmark_base.py
@@ -59,7 +59,7 @@ def is_runnable(self) -> bool:
         if not os.path.exists(os.path.join(self.benchmark_dir, "ground_truths")):
             logging.error(f"Ground truths directory not found: {self.benchmark_dir}")
             return False
-        if not self.provider in ["openai", "genai", "anthropic"]:
+        if not self.provider in ["openai", "genai", "anthropic", "mistral"]:
             logging.error(f"Invalid provider: {self.provider}")
             return False
         if not self.model:
@@ -74,7 +74,7 @@ def load_prompt(self) -> str:
         logging.debug(f"Loaded prompt from {prompt_path}")
         if self.has_file_information:
             try:
-                kwargs = {} # Add file information here
+                kwargs = {}  # Add file information here
                 return prompt.format(**kwargs)
             except KeyError as e:
                 return prompt
@@ -106,7 +106,6 @@ def load_ground_truth(self,
                 return {"error": "Invalid JSON format."}
         return {"response_text": ground_truth_text}
 
-
     def ask_llm(self,
                 image_paths: list[str]) -> dict:
         """ Ask the language model a question. """
@@ -135,7 +134,7 @@ def get_request_answer_path(self):
 
     def get_request_answer_file_name(self, image_name):
         """ Get the path to the answer file. """
-        return os.path.join(self.get_request_answer_path(), self.get_request_name(image_name)+".json")
+        return os.path.join(self.get_request_answer_path(), self.get_request_name(image_name) + ".json")
 
     def get_request_render_path(self):
         date_str = datetime.now().strftime('%Y-%m-%d')
@@ -159,7 +158,7 @@ def save_request_answer(self,
         logging.info(f"Saved answer to {file_name}")
 
     def save_benchmark_score(self,
-                                score: dict) -> None:
+                             score: dict) -> None:
         """ Save the benchmark score to a file. """
         date_str = datetime.now().strftime('%Y-%m-%d')
         save_path = os.path.join('..', "results", date_str, self.id, "scoring.json")
@@ -237,7 +236,7 @@ def run(self, regenerate_existing_results=True):
             image_paths = [os.path.join(images_dir, img) for img in img_files]
 
             if (regenerate_existing_results and os.path.exists(self.get_request_answer_file_name(image_name))) or \
-                (not os.path.exists(self.get_request_answer_file_name(image_name))):
+                    (not os.path.exists(self.get_request_answer_file_name(image_name))):
                 logging.info(f"Processing {self.id}, {image_name}...")
                 answer = self.ask_llm(image_paths)
                 self.save_request_answer(image_name, answer)
@@ -255,7 +254,6 @@ def run(self, regenerate_existing_results=True):
         benchmark_score = self.score_benchmark(benchmark_scores)
         self.save_benchmark_score(benchmark_score)
 
-
     def get_request_name(self, image_name: str) -> str:
         """ Get the name of the request. """
         return f"request_{self.id}_{os.path.splitext(image_name)[0]}"
@@ -271,9 +269,9 @@ def create_request_render(self,
 
     @abstractmethod
     def score_request_answer(self,
-                     image_name: str,
-                     response: dict,
-                     ground_truth: dict) -> dict:
+                             image_name: str,
+                             response: dict,
+                             ground_truth: dict) -> dict:
         """ Score the response. """
         pass
 
@@ -327,18 +325,18 @@ def score_benchmark(self, all_scores):
         return {"score": "niy"}
 
     def score_request_answer(self,
-                     image_name: str,
-                     response: dict,
-                     ground_truth: dict) -> dict:
+                             image_name: str,
+                             response: dict,
+                             ground_truth: dict) -> dict:
         """ Score the response. """
         return {}
 
     def create_request_render(self,
-                                image_name: str,
-                                result: dict,
-                                score: dict,
-                                truth) -> str:
-            """ Create a markdown render of the request. """
-            return ("### Result for image: {image_name}"
-                    "\n\n"
-                    "no details available")
+                              image_name: str,
+                              result: dict,
+                              score: dict,
+                              truth) -> str:
+        """ Create a markdown render of the request. """
+        return ("### Result for image: {image_name}"
+                "\n\n"
+                "no details available")
diff --git a/scripts/simple_ai_clients.py b/scripts/simple_ai_clients.py
@@ -1,4 +1,4 @@
-"""Simple AI API client for OpenAI, GenAI, and Anthropic."""
+"""Simple AI API client for OpenAI, GenAI, Anthropic, and Mistral AI."""
 import base64
 from dataclasses import asdict
 from datetime import datetime
@@ -7,13 +7,16 @@
 import google.generativeai as genai
 from openai import OpenAI
 from anthropic import Anthropic
+from mistralai import Mistral
+
 
 class AiApiClient:
     """Simple AI API client for OpenAI, GenAI, and Anthropic."""
 
     SUPPORTED_APIS = ['openai',
                       'genai',
-                      'anthropic']
+                      'anthropic',
+                      'mistral']
 
     api_client = None
     image_resources = []
@@ -52,6 +55,11 @@ def init_client(self):
                 api_key=self.api_key,
             )
 
+        if self.api == 'mistral':
+            self.api_client = Mistral(
+                api_key=self.api_key
+            )
+
     @property
     def elapsed_time(self):
         """Return the elapsed time since the client was initialized."""
@@ -144,6 +152,28 @@ def prompt(self, model, prompt):
             )
             answer = message
 
+        if self.api == 'mistral':
+            content = [{"type": "text", "text": prompt}]
+            for img_path in self.image_resources:
+                with open(img_path, "rb") as image_file:
+                    base64_image = base64.b64encode(image_file.read()).decode("utf-8")
+                    data_uri = f"data:image/jpeg;base64,{base64_image}"
+                    content.append({
+                        "type": "image_url",
+                        "image_url": {
+                            "url": data_uri
+                        }
+                    })
+
+            message = self.api_client.chat.complete(
+                messages=[{
+                    "role": "user",
+                    "content": content,
+                }],
+                model=model,
+            )
+            answer = message
+
         end_time = time.time()
         elapsed_time = end_time - prompt_start
         return self.create_answer(answer, elapsed_time, model)
@@ -169,6 +199,8 @@ def create_answer(self, response, elapsed_time, model):
             answer['response_text'] = response.text
         elif self.api == 'anthropic':
             answer['response_text'] = response.content[0].text
+        elif self.api == 'mistral':
+            answer['response_text'] = response.choices[0].message.content
 
         return answer
 
@@ -184,4 +216,7 @@ def get_model_list(self):
             return genai.list_models()
 
         if self.api == 'anthropic':
-            return self.api_client.models.list()
+            return self.api_client.models.list()
+
+        if self.api == 'mistral':
+            return self.api_client.models.list()