oceddyyy commited on
Commit
b0719dc
·
verified ·
1 Parent(s): 3a41f27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -29
app.py CHANGED
@@ -1,9 +1,4 @@
1
  import os
2
- os.environ["HF_HOME"] = "/tmp/.cache"
3
- os.environ["HF_DATASETS_CACHE"] = "/tmp/.cache"
4
- os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/.cache"
5
- os.makedirs("/tmp/.cache", exist_ok=True)
6
-
7
  import json
8
  from sentence_transformers import SentenceTransformer
9
  from sklearn.metrics.pairwise import cosine_similarity
@@ -12,14 +7,22 @@ from huggingface_hub import upload_file, hf_hub_download, InferenceClient
12
  from flask import Flask, request, jsonify
13
  import time
14
 
15
- embedding_model = SentenceTransformer('paraphrase-mpnet-base-v2')
16
 
 
 
 
 
 
 
 
 
17
  token = os.getenv("HF_TOKEN") or os.getenv("NEW_PUP_AI_Project")
18
  inference_client = InferenceClient(
19
  model="mistralai/Mixtral-8x7B-Instruct-v0.1",
20
  token=token
21
  )
22
 
 
23
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
24
  DATASET_PATH = os.path.join(BASE_DIR, "dataset.json")
25
  with open(DATASET_PATH, "r") as f:
@@ -29,14 +32,17 @@ questions = [item["question"] for item in dataset]
29
  answers = [item["answer"] for item in dataset]
30
  question_embeddings = embedding_model.encode(questions, convert_to_tensor=True)
31
 
32
- chat_history = []
33
  feedback_data = []
34
  feedback_questions = []
35
  feedback_embeddings = None
36
- dev_mode = {"enabled": False}
37
-
38
  feedback_path = "/tmp/outputs/feedback.json"
39
- os.makedirs("/tmp/outputs", exist_ok=True)
 
 
 
 
40
 
41
  try:
42
  hf_token = os.getenv("NEW_PUP_AI_Project")
@@ -60,28 +66,28 @@ except Exception as e:
60
  feedback_data = []
61
 
62
 
63
- def upload_feedback_to_hf():
 
64
  hf_token = os.getenv("NEW_PUP_AI_Project")
65
  if not hf_token:
66
  raise ValueError("Hugging Face token not found in environment variables!")
67
 
68
  try:
69
  upload_file(
70
- path_or_fileobj=feedback_path,
71
- path_in_repo="feedback.json",
72
  repo_id="oceddyyy/University_Inquiries_Feedback",
73
  repo_type="dataset",
74
  token=hf_token
75
  )
76
- print("Feedback uploaded to Hugging Face successfully.")
77
  except Exception as e:
78
- print(f"Error uploading feedback to HF: {e}")
79
 
80
 
81
  def chatbot_response(query, dev_mode_flag):
82
  query_embedding = embedding_model.encode([query], convert_to_tensor=True)
83
 
84
- # Feedback check
85
  if feedback_embeddings is not None:
86
  feedback_scores = cosine_similarity(query_embedding.cpu().numpy(), feedback_embeddings.cpu().numpy())[0]
87
  best_idx = int(np.argmax(feedback_scores))
@@ -97,7 +103,6 @@ def chatbot_response(query, dev_mode_flag):
97
  if best_score >= dynamic_threshold:
98
  return matched_feedback["response"], "Feedback", 0.0
99
 
100
- # Handbook retrieval
101
  similarity_scores = cosine_similarity(query_embedding.cpu().numpy(), question_embeddings.cpu().numpy())[0]
102
  top_k = 3
103
  top_k_indices = np.argsort(similarity_scores)[-top_k:][::-1]
@@ -113,6 +118,7 @@ def chatbot_response(query, dev_mode_flag):
113
  context = ""
114
  for i, item in enumerate(top_k_items):
115
  context += f"Relevant info #{i+1} (score: {top_k_scores[i]:.2f}):\n\"{item.get('answer', '')}\"\n\n"
 
116
  prompt = (
117
  f"You are an expert university assistant. "
118
  f"A student asked: \"{query}\"\n"
@@ -133,14 +139,13 @@ def chatbot_response(query, dev_mode_flag):
133
  llm_response = inference_client.chat_completion(
134
  messages=conversation,
135
  model="mistralai/Mixtral-8x7B-Instruct-v0.1",
136
- max_tokens=200,
137
  temperature=0.7
138
  )
139
  if isinstance(llm_response, dict) and "choices" in llm_response:
140
  response = llm_response["choices"][0]["message"]["content"]
141
  elif hasattr(llm_response, "generated_text"):
142
  response = llm_response.generated_text
143
-
144
  else:
145
  llm_response = inference_client.text_generation(
146
  prompt,
@@ -175,7 +180,8 @@ def chatbot_response(query, dev_mode_flag):
175
  return response.strip(), matched_source, 0.0
176
 
177
 
178
- def record_feedback(feedback, query, response):
 
179
  global feedback_embeddings, feedback_questions
180
  matched = False
181
  new_embedding = embedding_model.encode([query], convert_to_tensor=True)
@@ -191,8 +197,8 @@ def record_feedback(feedback, query, response):
191
 
192
  if not matched:
193
  entry = {
194
- "question": query,
195
- "response": response,
196
  "feedback": feedback,
197
  "upvotes": 1 if feedback == "positive" else 0,
198
  "downvotes": 1 if feedback == "negative" else 0
@@ -206,9 +212,30 @@ def record_feedback(feedback, query, response):
206
  if feedback_questions:
207
  feedback_embeddings = embedding_model.encode(feedback_questions, convert_to_tensor=True)
208
 
209
- upload_feedback_to_hf()
210
 
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  app = Flask(__name__)
213
 
214
  @app.route("/api/chat", methods=["POST"])
@@ -220,21 +247,19 @@ def chat():
220
  response, source, elapsed = chatbot_response(query, dev)
221
  return jsonify({"response": response, "source": source, "response_time": elapsed})
222
 
223
-
224
  @app.route("/api/feedback", methods=["POST"])
225
  def feedback():
226
  data = request.json
227
  query = data.get("query", "")
228
  response = data.get("response", "")
229
  feedback_type = data.get("feedback", "")
230
- record_feedback(feedback_type, query, response)
 
231
  return jsonify({"status": "success"})
232
 
233
-
234
  @app.route("/", methods=["GET"])
235
  def index():
236
- return "University Inquiries AI Chatbot API. Use POST /chat or /feedback.", 200
237
-
238
 
239
  if __name__ == "__main__":
240
- app.run(host="0.0.0.0", port=7861)
 
1
  import os
 
 
 
 
 
2
  import json
3
  from sentence_transformers import SentenceTransformer
4
  from sklearn.metrics.pairwise import cosine_similarity
 
7
  from flask import Flask, request, jsonify
8
  import time
9
 
 
10
 
11
+ os.environ["HF_HOME"] = "/tmp/.cache"
12
+ os.environ["HF_DATASETS_CACHE"] = "/tmp/.cache"
13
+ os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp/.cache"
14
+ os.makedirs("/tmp/.cache", exist_ok=True)
15
+ os.makedirs("/tmp/outputs", exist_ok=True)
16
+
17
+
18
+ embedding_model = SentenceTransformer('paraphrase-mpnet-base-v2')
19
  token = os.getenv("HF_TOKEN") or os.getenv("NEW_PUP_AI_Project")
20
  inference_client = InferenceClient(
21
  model="mistralai/Mixtral-8x7B-Instruct-v0.1",
22
  token=token
23
  )
24
 
25
+
26
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
27
  DATASET_PATH = os.path.join(BASE_DIR, "dataset.json")
28
  with open(DATASET_PATH, "r") as f:
 
32
  answers = [item["answer"] for item in dataset]
33
  question_embeddings = embedding_model.encode(questions, convert_to_tensor=True)
34
 
35
+
36
  feedback_data = []
37
  feedback_questions = []
38
  feedback_embeddings = None
39
+ dev_mode = {"enabled": False}
 
40
  feedback_path = "/tmp/outputs/feedback.json"
41
+ COMMENTS_PATH = "/tmp/outputs/Comments.json"
42
+
43
+ if not os.path.exists(COMMENTS_PATH):
44
+ with open(COMMENTS_PATH, "w") as f:
45
+ json.dump([], f, indent=4)
46
 
47
  try:
48
  hf_token = os.getenv("NEW_PUP_AI_Project")
 
66
  feedback_data = []
67
 
68
 
69
+ def upload_file_to_hf(local_path, remote_filename):
70
+ """Helper to upload any file to Hugging Face dataset repo."""
71
  hf_token = os.getenv("NEW_PUP_AI_Project")
72
  if not hf_token:
73
  raise ValueError("Hugging Face token not found in environment variables!")
74
 
75
  try:
76
  upload_file(
77
+ path_or_fileobj=local_path,
78
+ path_in_repo=remote_filename,
79
  repo_id="oceddyyy/University_Inquiries_Feedback",
80
  repo_type="dataset",
81
  token=hf_token
82
  )
83
+ print(f"{remote_filename} uploaded to Hugging Face successfully.")
84
  except Exception as e:
85
+ print(f"Error uploading {remote_filename} to HF: {e}")
86
 
87
 
88
  def chatbot_response(query, dev_mode_flag):
89
  query_embedding = embedding_model.encode([query], convert_to_tensor=True)
90
 
 
91
  if feedback_embeddings is not None:
92
  feedback_scores = cosine_similarity(query_embedding.cpu().numpy(), feedback_embeddings.cpu().numpy())[0]
93
  best_idx = int(np.argmax(feedback_scores))
 
103
  if best_score >= dynamic_threshold:
104
  return matched_feedback["response"], "Feedback", 0.0
105
 
 
106
  similarity_scores = cosine_similarity(query_embedding.cpu().numpy(), question_embeddings.cpu().numpy())[0]
107
  top_k = 3
108
  top_k_indices = np.argsort(similarity_scores)[-top_k:][::-1]
 
118
  context = ""
119
  for i, item in enumerate(top_k_items):
120
  context += f"Relevant info #{i+1} (score: {top_k_scores[i]:.2f}):\n\"{item.get('answer', '')}\"\n\n"
121
+
122
  prompt = (
123
  f"You are an expert university assistant. "
124
  f"A student asked: \"{query}\"\n"
 
139
  llm_response = inference_client.chat_completion(
140
  messages=conversation,
141
  model="mistralai/Mixtral-8x7B-Instruct-v0.1",
142
+ max_tokens=200,
143
  temperature=0.7
144
  )
145
  if isinstance(llm_response, dict) and "choices" in llm_response:
146
  response = llm_response["choices"][0]["message"]["content"]
147
  elif hasattr(llm_response, "generated_text"):
148
  response = llm_response.generated_text
 
149
  else:
150
  llm_response = inference_client.text_generation(
151
  prompt,
 
180
  return response.strip(), matched_source, 0.0
181
 
182
 
183
+ def record_feedback(feedback, query, response, comment=None):
184
+ """Records user feedback and optional comment."""
185
  global feedback_embeddings, feedback_questions
186
  matched = False
187
  new_embedding = embedding_model.encode([query], convert_to_tensor=True)
 
197
 
198
  if not matched:
199
  entry = {
200
+ "question": query,
201
+ "response": response,
202
  "feedback": feedback,
203
  "upvotes": 1 if feedback == "positive" else 0,
204
  "downvotes": 1 if feedback == "negative" else 0
 
212
  if feedback_questions:
213
  feedback_embeddings = embedding_model.encode(feedback_questions, convert_to_tensor=True)
214
 
215
+ upload_file_to_hf(feedback_path, "feedback.json")
216
 
217
 
218
+ if comment and comment.strip():
219
+ try:
220
+ with open(COMMENTS_PATH, "r") as f:
221
+ comments_list = json.load(f)
222
+ except json.JSONDecodeError:
223
+ comments_list = []
224
+
225
+ comment_entry = {
226
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
227
+ "question": query,
228
+ "response": response,
229
+ "feedback": feedback,
230
+ "comment": comment.strip()
231
+ }
232
+ comments_list.append(comment_entry)
233
+
234
+ with open(COMMENTS_PATH, "w") as f:
235
+ json.dump(comments_list, f, indent=4)
236
+
237
+ upload_file_to_hf(COMMENTS_PATH, "Comments.json")
238
+
239
  app = Flask(__name__)
240
 
241
  @app.route("/api/chat", methods=["POST"])
 
247
  response, source, elapsed = chatbot_response(query, dev)
248
  return jsonify({"response": response, "source": source, "response_time": elapsed})
249
 
 
250
  @app.route("/api/feedback", methods=["POST"])
251
  def feedback():
252
  data = request.json
253
  query = data.get("query", "")
254
  response = data.get("response", "")
255
  feedback_type = data.get("feedback", "")
256
+ comment = data.get("comment", None)
257
+ record_feedback(feedback_type, query, response, comment)
258
  return jsonify({"status": "success"})
259
 
 
260
  @app.route("/", methods=["GET"])
261
  def index():
262
+ return "University Inquiries AI Chatbot API. Use POST /api/chat or /api/feedback.", 200
 
263
 
264
  if __name__ == "__main__":
265
+ app.run(host="0.0.0.0", port=7861)