Spaces:

jedick
/

R-help-chat

Running on Zero

App Files Files Community

jedick commited on Jul 26

Commit

7d21953

1 Parent(s): 556cc72

Handle tool calls with thinking enabled

Browse files

Files changed (4) hide show

graph.py +15 -14
main.py +2 -2
prompts.py +16 -12
requirements.txt +1 -1

graph.py CHANGED Viewed

@@ -9,7 +9,7 @@ import os
 # Local modules
 from retriever import BuildRetriever
-from prompts import retrieve_prompt, answer_prompt, gemma_tools_template
 from mods.tool_calling_llm import ToolCallingLLM
 # Local modules
@@ -49,13 +49,14 @@ def print_message_summaries(messages, header):
 def normalize_messages(messages):
     """Normalize messages to sequence of types expected by chat templates"""
     # Copy the most recent HumanMessage to the end
-    # (avoids SmolLM3 ValueError: Last message must be a HumanMessage!)
     if not type(messages[-1]) is HumanMessage:
         for msg in reversed(messages):
             if type(msg) is HumanMessage:
                 messages.append(msg)
     # Convert tool output (ToolMessage) to AIMessage
-    # (avoids SmolLM3 ValueError: Unknown message type: <class 'langchain_core.messages.tool.ToolMessage'>)
     messages = [
         AIMessage(msg.content) if type(msg) is ToolMessage else msg for msg in messages
     ]
@@ -75,7 +76,7 @@ def ToolifyHF(chat_model, system_message, system_message_suffix="", think=False)
     Get a Hugging Face model ready for bind_tools().
     """
-    ## Add /no_think flag to turn off thinking mode (SmolLM3)
     # if not think:
     #    system_message = "/no_think\n" + system_message
@@ -203,14 +204,12 @@ def BuildGraph(
     # Add tools to the local or remote chat model
     is_local = hasattr(chat_model, "model_id")
     if is_local:
-        # For local model (ChatHuggingFace)
         query_model = ToolifyHF(
-            chat_model, retrieve_prompt(compute_mode), "", think_retrieve
         ).bind_tools([retrieve_emails])
-        # Don't use answer_with_citations tool here because responses with Gemma are sometimes unparseable
-        generate_model = ToolifyHF(
-            chat_model, answer_prompt(with_tools=False), "", think_generate
-        )
     else:
         # For remote model (OpenAI API)
         query_model = chat_model.bind_tools([retrieve_emails])
@@ -228,9 +227,7 @@ def BuildGraph(
             messages = normalize_messages(messages)
             print_message_summaries(messages, "--- query: after normalization ---")
         else:
-            messages = [SystemMessage(retrieve_prompt(compute_mode))] + state[
-                "messages"
-            ]
         response = query_model.invoke(messages)
         return {"messages": response}
@@ -241,9 +238,13 @@ def BuildGraph(
             messages = state["messages"]
             print_message_summaries(messages, "--- generate: before normalization ---")
             messages = normalize_messages(messages)
             print_message_summaries(messages, "--- generate: after normalization ---")
         else:
-            messages = [SystemMessage(answer_prompt())] + state["messages"]
         response = generate_model.invoke(messages)
         return {"messages": response}

 # Local modules
 from retriever import BuildRetriever
+from prompts import query_prompt, generate_prompt, gemma_tools_template
 from mods.tool_calling_llm import ToolCallingLLM
 # Local modules
 def normalize_messages(messages):
     """Normalize messages to sequence of types expected by chat templates"""
     # Copy the most recent HumanMessage to the end
+    # (avoids SmolLM and Qwen ValueError: Last message must be a HumanMessage!)
     if not type(messages[-1]) is HumanMessage:
         for msg in reversed(messages):
             if type(msg) is HumanMessage:
                 messages.append(msg)
+                break
     # Convert tool output (ToolMessage) to AIMessage
+    # (avoids SmolLM and Qwen ValueError: Unknown message type: <class 'langchain_core.messages.tool.ToolMessage'>)
     messages = [
         AIMessage(msg.content) if type(msg) is ToolMessage else msg for msg in messages
     ]
     Get a Hugging Face model ready for bind_tools().
     """
+    ## Add /no_think flag to turn off thinking mode (SmolLM3 and Qwen)
     # if not think:
     #    system_message = "/no_think\n" + system_message
     # Add tools to the local or remote chat model
     is_local = hasattr(chat_model, "model_id")
     if is_local:
+        # For local models (ChatHuggingFace with SmolLM, Gemma, or Qwen)
         query_model = ToolifyHF(
+            chat_model, query_prompt(compute_mode), "", think_retrieve
         ).bind_tools([retrieve_emails])
+        # Don't use answer_with_citations tool because responses with are sometimes unparseable
+        generate_model = chat_model
     else:
         # For remote model (OpenAI API)
         query_model = chat_model.bind_tools([retrieve_emails])
             messages = normalize_messages(messages)
             print_message_summaries(messages, "--- query: after normalization ---")
         else:
+            messages = [SystemMessage(query_prompt(compute_mode))] + state["messages"]
         response = query_model.invoke(messages)
         return {"messages": response}
             messages = state["messages"]
             print_message_summaries(messages, "--- generate: before normalization ---")
             messages = normalize_messages(messages)
+            # Add the system message here because we're not using tools
+            messages = [
+                SystemMessage(generate_prompt(with_tools=False, think=False))
+            ] + messages
             print_message_summaries(messages, "--- generate: after normalization ---")
         else:
+            messages = [SystemMessage(generate_prompt())] + state["messages"]
         response = generate_model.invoke(messages)
         return {"messages": response}

main.py CHANGED Viewed

@@ -23,7 +23,7 @@ from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
 from index import ProcessFile
 from retriever import BuildRetriever, db_dir
 from graph import BuildGraph
-from prompts import answer_prompt
 # -----------
 # R-help-chat
@@ -200,7 +200,7 @@ def RunChain(
     chat_model = GetChatModel(compute_mode)
     # Control thinking for SmolLM3
-    system_prompt = answer_prompt()
     if hasattr(chat_model, "model_id") and not think:
         system_prompt = f"/no_think\n{system_prompt}"

 from index import ProcessFile
 from retriever import BuildRetriever, db_dir
 from graph import BuildGraph
+from prompts import generate_prompt
 # -----------
 # R-help-chat
     chat_model = GetChatModel(compute_mode)
     # Control thinking for SmolLM3
+    system_prompt = generate_prompt()
     if hasattr(chat_model, "model_id") and not think:
         system_prompt = f"/no_think\n{system_prompt}"

prompts.py CHANGED Viewed

@@ -3,7 +3,7 @@ from util import get_sources, get_start_end_months
 import re
-def retrieve_prompt(compute_mode):
     """Return system prompt for query step
     Args:
@@ -13,11 +13,11 @@ def retrieve_prompt(compute_mode):
     # Get start and end months from database
     start, end = get_start_end_months(get_sources())
-    retrieve_prompt = (
         f"Today Date: {date.today()}."
         "You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list."
         "Do not ask the user for more information, but retrieve emails from the R-help mailing list archives."
-        # gpt-4o-mini says last two months aren't available with this: Emails from from {start} to {end} are available for retrieval.
         f"The emails available for retrieval are from {start} to {end}."
         "Write a search query based on the user's question, but do not answer the question just yet."
         "For questions about differences or comparison between X and Y, retrieve emails about X and Y."
@@ -25,19 +25,20 @@ def retrieve_prompt(compute_mode):
         "For specific questions, use retrieve_emails(search_query=<specific topic>)."
         "For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year)."
         "For questions about months, use 3-letter abbreviations (Jan..Dec) for the 'month' argument."
-        "If you decide not to retrieve emails, tell the user why and suggest how to improve their question to chat with the R-help mailing list."
     )
     # A sanity check that we don't have unassigned variables
     # (this causes KeyError in parsing by ToolCallingLLM)
-    matches = re.findall(r"\{.*?\}", " ".join(retrieve_prompt))
     if matches:
         raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
-    return retrieve_prompt
-def answer_prompt(with_tools=True):
     """Return system prompt for generate step"""
-    answer_prompt = (
         f"Today Date: {date.today()}."
         "You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list."
         "Summarize the retrieved emails from the R-help mailing list archives to answer the user's question or query."
@@ -45,17 +46,20 @@ def answer_prompt(with_tools=True):
         "Tell the user if there are no retrieved emails or if you are unable to answer the question based on the information in the emails."
         "Do not give an answer based on your own knowledge or memory, and do not include examples that aren't based on the retrieved emails."
         "Example: For a question about writing formulas for lm(), make your answer about formulas for lm() from the retrieved emails."
-        "Do not respond with packages that are only listed under sessionInfo, session info, or other attached packages."
         "Include inline citations (email senders and dates) in your response."
         "Only answer general questions about R if the answer is given in the retrieved emails."
         "Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails."
     )
     if with_tools:
-        answer_prompt += "Use answer_with_citations to provide the complete answer and all citations used. "
-    matches = re.findall(r"\{.*?\}", " ".join(answer_prompt))
     if matches:
         raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
-    return answer_prompt
 # Prompt template for SmolLM3 with tools

 import re
+def query_prompt(compute_mode):
     """Return system prompt for query step
     Args:
     # Get start and end months from database
     start, end = get_start_end_months(get_sources())
+    query_prompt = (
         f"Today Date: {date.today()}."
         "You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list."
         "Do not ask the user for more information, but retrieve emails from the R-help mailing list archives."
+        # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval."
         f"The emails available for retrieval are from {start} to {end}."
         "Write a search query based on the user's question, but do not answer the question just yet."
         "For questions about differences or comparison between X and Y, retrieve emails about X and Y."
         "For specific questions, use retrieve_emails(search_query=<specific topic>)."
         "For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year)."
         "For questions about months, use 3-letter abbreviations (Jan..Dec) for the 'month' argument."
+        "Even if retrieved emails are already available, you should retrieve *more* emails to answer the most recent question."  # Qwen
+        # "If you decide not to retrieve emails, tell the user why and suggest how to improve their question to chat with the R-help mailing list."
     )
     # A sanity check that we don't have unassigned variables
     # (this causes KeyError in parsing by ToolCallingLLM)
+    matches = re.findall(r"\{.*?\}", " ".join(query_prompt))
     if matches:
         raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
+    return query_prompt
+def generate_prompt(with_tools=True, think=True):
     """Return system prompt for generate step"""
+    generate_prompt = (
         f"Today Date: {date.today()}."
         "You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list."
         "Summarize the retrieved emails from the R-help mailing list archives to answer the user's question or query."
         "Tell the user if there are no retrieved emails or if you are unable to answer the question based on the information in the emails."
         "Do not give an answer based on your own knowledge or memory, and do not include examples that aren't based on the retrieved emails."
         "Example: For a question about writing formulas for lm(), make your answer about formulas for lm() from the retrieved emails."
+        # "Do not respond with packages that are only listed under sessionInfo, session info, or other attached packages."
+        "Summarize the content of the emails rather than copying the headers."  # Qwen
         "Include inline citations (email senders and dates) in your response."
         "Only answer general questions about R if the answer is given in the retrieved emails."
         "Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails."
     )
     if with_tools:
+        generate_prompt += "Use answer_with_citations to provide the complete answer and all citations used."
+    if not think:
+        generate_prompt += "/no_think"
+    matches = re.findall(r"\{.*?\}", " ".join(generate_prompt))
     if matches:
         raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
+    return generate_prompt
 # Prompt template for SmolLM3 with tools

requirements.txt CHANGED Viewed

@@ -13,7 +13,7 @@ torch==2.5.1
 #   Gemma 3: transformers>=4.50
 # Gemma 3 with transformers==4.54.0 gives:
 #   ValueError: Max cache length is not consistent across layers
-transformers==4.51.3
 # Commented because we have local modifications
 #tool-calling-llm==0.1.2
 bm25s==0.2.12

 #   Gemma 3: transformers>=4.50
 # Gemma 3 with transformers==4.54.0 gives:
 #   ValueError: Max cache length is not consistent across layers
+transformers==4.50.0
 # Commented because we have local modifications
 #tool-calling-llm==0.1.2
 bm25s==0.2.12