Merge pull request #6 from seanpedrick-case/dev

seanpedrick-case · web-flow · commit 22d34acfd7a3 · 2025-04-24T21:38:55.000+01:00
Corrected Gemma model prompt format, made prompts in general ask for more detail. Wording changes.
diff --git a/app.py b/app.py
@@ -223,7 +223,7 @@ def load_model(model_type:str, gpu_layers:int, gpu_config:dict=gpu_config, cpu_c
 
     gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")        
     
-    gr.Markdown(f"""Chat with PDF, web page or (new) csv/Excel documents. The default is a small model ({SMALL_MODEL_NAME}), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. The alternative ({LARGE_MODEL_NAME}, if available), can reason a little better, but is much slower (See Advanced settings tab).\n\nBy default '[{DEFAULT_DATA_SOURCE_NAME}]({DEFAULT_DATA_SOURCE})' is loaded.If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nCaution: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.""")
+    gr.Markdown(f"""Chat with PDFs, web pages or data files (.csv / .xlsx). The default is a small model ({SMALL_MODEL_NAME}), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. Go to Advanced settings to change model to e.g. a choice of Gemini models that are available on [their very generous free tier](https://ai.google.dev/gemini-api/docs/pricing) (needs an API key), or AWS Bedrock/larger local models if activated.\n\nBy default '[{DEFAULT_DATA_SOURCE_NAME}]({DEFAULT_DATA_SOURCE})' is loaded as a data source. If you want to query another data source, please upload it on the 'Change data source' tab. If switching topic, please click the 'Clear chat' button. 'Stop generating' will halt the language model during its response.\n\n**Caution: On Hugging Face, this is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it!** Also, please note that AI chatbots may give incomplete or incorrect information, so please use with care and ensure that you verify any outputs before further use.""")
     
     with gr.Row():
         current_source = gr.Textbox(label="Current data source(s)", value=DEFAULT_DATA_SOURCE, scale = 10)
@@ -252,7 +252,7 @@ def load_model(model_type:str, gpu_layers:int, gpu_config:dict=gpu_config, cpu_c
         
         current_topic = gr.Textbox(label="Feature currently disabled - Keywords related to current conversation topic.", placeholder="Keywords related to the conversation topic will appear here", visible=False)
 
-    with gr.Tab("Load in a different file/webpage"):
+    with gr.Tab("Change data source"):
         with gr.Accordion("PDF file", open = False):
             in_pdf = gr.File(label="Upload pdf", file_count="multiple", file_types=['.pdf'])
             load_pdf = gr.Button(value="Load in file", variant="secondary", scale=0)
diff --git a/chatfuncs/chatfuncs.py b/chatfuncs/chatfuncs.py
@@ -32,7 +32,7 @@
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.docstore.document import Document
 
-from chatfuncs.prompts import instruction_prompt_template_alpaca, instruction_prompt_mistral_orca, instruction_prompt_phi3, instruction_prompt_llama3, instruction_prompt_qwen, instruction_prompt_template_orca, instruction_prompt_gemma
+from chatfuncs.prompts import instruction_prompt_template_alpaca, instruction_prompt_mistral_orca, instruction_prompt_phi3, instruction_prompt_llama3, instruction_prompt_qwen, instruction_prompt_template_orca, instruction_prompt_gemma, instruction_prompt_template_gemini_aws
 from chatfuncs.model_load import temperature, max_new_tokens, sample, repetition_penalty, top_p, top_k, torch_device, CtransGenGenerationConfig, max_tokens
 from chatfuncs.config import GEMINI_API_KEY, AWS_DEFAULT_REGION, LARGE_MODEL_NAME, SMALL_MODEL_NAME, RUN_AWS_FUNCTIONS, FEEDBACK_LOGS_FOLDER
 
@@ -136,11 +136,11 @@ def base_prompt_templates(model_type:str = SMALL_MODEL_NAME):
 # The main prompt:  
 
     if model_type == SMALL_MODEL_NAME:
-        INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_qwen, input_variables=['question', 'summaries'])
+        INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_gemma, input_variables=['question', 'summaries'])
     elif model_type == LARGE_MODEL_NAME:
         INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_phi3, input_variables=['question', 'summaries'])
     else:
-        INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_orca, input_variables=['question', 'summaries'])
+        INSTRUCTION_PROMPT=PromptTemplate(template=instruction_prompt_template_gemini_aws, input_variables=['question', 'summaries'])
         
 
     return INSTRUCTION_PROMPT, CONTENT_PROMPT
@@ -507,7 +507,6 @@ def produce_streaming_answer_chatbot(
                     new_text = ""
                 history[-1]['content'] += new_text
                 NUM_TOKENS += 1
-                history[-1]['content'] = history[-1]['content'].replace('<|im_end|>','')
                 yield history
             except Exception as e:
                 print(f"Error during text generation: {e}")
@@ -543,7 +542,6 @@ def produce_streaming_answer_chatbot(
             if "choices" in out and len(out["choices"]) > 0 and "text" in out["choices"][0]:
                 history[-1]['content'] += out["choices"][0]["text"]
                 NUM_TOKENS+=1
-                history[-1]['content'] = history[-1]['content'].replace('<|im_end|>','')
                 yield history
             else:
                 print(f"Unexpected output structure: {out}") 
@@ -557,7 +555,7 @@ def produce_streaming_answer_chatbot(
         print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
 
     elif "claude" in model_type:
-        system_prompt = "You are answering questions from the user based on source material. Respond with short, factually correct answers."
+        system_prompt = "You are answering questions from the user based on source material. Make sure to fully answer the questions with all required detail."
 
         print("full_prompt:", full_prompt)
 
@@ -595,13 +593,11 @@ def produce_streaming_answer_chatbot(
         elif GEMINI_API_KEY: gemini_api_key = GEMINI_API_KEY
         else: raise Exception("Gemini API key not found. Please enter a key on the Advanced settings page or select another model type")
 
-        print("Using Gemini model:", model_type)
-        print("full_prompt:", full_prompt)
-
+        
         if isinstance(full_prompt, str):
             full_prompt = [full_prompt]
 
-        system_prompt = "You are answering questions from the user based on source material. Respond with short, factually correct answers."
+        system_prompt = "You are answering questions from the user based on source material. Make sure to fully answer the questions with all required detail."
 
         model, config = construct_gemini_generative_model(gemini_api_key, temperature, model_type, system_prompt, max_tokens)
 
diff --git a/chatfuncs/config.py b/chatfuncs/config.py
@@ -216,7 +216,7 @@ def add_folder_to_path(folder_path: str):
 
 DEFAULT_DATA_SOURCE = get_or_create_env_var('DEFAULT_DATA_SOURCE', "https://seanpedrick-case.github.io/doc_redaction/README.html")
 
-DEFAULT_EXAMPLES = get_or_create_env_var('DEFAULT_EXAMPLES', '[ "How can I make a custom deny list?", "How can I find page duplicates?", "How can I review and modify existing redactions?", "How can I export my review files to Adobe?"]')
+DEFAULT_EXAMPLES = get_or_create_env_var('DEFAULT_EXAMPLES', '[ "How can I make a custom deny list?", "How can I find duplicate pages in a document?", "How can I review and modify existing redactions?", "How can I export my review files to Adobe?"]')
                 # 
                 # ') # ["What were the five pillars of the previous borough plan?",
                 #"What is the vision statement for Lambeth?",
diff --git a/chatfuncs/prompts.py b/chatfuncs/prompts.py
@@ -73,7 +73,14 @@
 Answer:<|im_end|>
 <|im_start|>assistant\n"""
 
-instruction_prompt_gemma = """Answer the QUESTION using information from the following CONTENT. Respond with short answers that directly answer the question.
+instruction_prompt_gemma = """<start_of_turn>user
+Answer the QUESTION using information from the following CONTENT. Make sure to fully answer the question with all required detail.
+CONTENT: {summaries}
+QUESTION: {question}<end_of_turn>
+<start_of_turn>model
+"""
+
+instruction_prompt_template_gemini_aws = """Answer the QUESTION with a using information from the following CONTENT. Make sure to fully answer the question with all required detail.
 CONTENT: {summaries}
 QUESTION: {question}
-assistant:"""
+Answer:"""
diff --git a/docker_build_run_commands.txt b/docker_build_run_commands.txt
@@ -0,0 +1,2 @@
+docker build -t qa_chatbot .
+docker run -p 7860:7860 -e HF_TOKEN=<token> qa_chatbot # HF_TOKEN is required to download Gemma 3

Original file line number	Diff line number	Diff line change
`@@ -216,7 +216,7 @@ def add_folder_to_path(folder_path: str):`
`216`	`216`
`217`	`217`	`DEFAULT_DATA_SOURCE = get_or_create_env_var('DEFAULT_DATA_SOURCE', "https://seanpedrick-case.github.io/doc_redaction/README.html")`
`218`	`218`
`219`		`-DEFAULT_EXAMPLES = get_or_create_env_var('DEFAULT_EXAMPLES', '[ "How can I make a custom deny list?", "How can I find page duplicates?", "How can I review and modify existing redactions?", "How can I export my review files to Adobe?"]')`
	`219`	`+DEFAULT_EXAMPLES = get_or_create_env_var('DEFAULT_EXAMPLES', '[ "How can I make a custom deny list?", "How can I find duplicate pages in a document?", "How can I review and modify existing redactions?", "How can I export my review files to Adobe?"]')`
`220`	`220`	`#`
`221`	`221`	`# ') # ["What were the five pillars of the previous borough plan?",`
`222`	`222`	`#"What is the vision statement for Lambeth?",`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+docker build -t qa_chatbot .`
	`2`	`+docker run -p 7860:7860 -e HF_TOKEN=<token> qa_chatbot # HF_TOKEN is required to download Gemma 3`