jennifee aslan-ng commited on
Commit
8cd4c71
·
verified ·
1 Parent(s): 0fed93a

Update app.py (#5)

Browse files

- Update app.py (d03a66cdb5555dafa34555e5f7802e5c03892b1a)


Co-authored-by: Aslan Noorghasemi <[email protected]>

Files changed (1) hide show
  1. app.py +2 -90
app.py CHANGED
@@ -1,5 +1,3 @@
1
- # Import dependencies
2
-
3
  from langchain.prompts import PromptTemplate
4
  from langchain.chains import LLMChain
5
  from pyvis.network import Network
@@ -33,17 +31,14 @@ from peft import PeftModel, LoraConfig, get_peft_model, TaskType
33
 
34
  # Setup
35
 
36
- #token_public = ""
37
- #login(token)
38
-
39
  REPO_ID_NEAR_FIELD_RAW = "milistu/AMAZON-Products-2023"
40
  REPO_ID_NEAR_FIELD = "aslan-ng/amazon_products_2023"
41
  REPO_ID_FAR_FIELD = "aslan-ng/amazon_products_2025"
 
42
 
43
  def product_quality_score(average_rating: float, rating_number: int):
44
  """
45
  Bayesian Average (Amazon-style)
46
-
47
  Args:
48
  avg_rating: product's average rating
49
  rating_number: number of reviews
@@ -54,10 +49,6 @@ def product_quality_score(average_rating: float, rating_number: int):
54
  return C # fallback to global mean
55
  return (rating_number / (rating_number + m)) * average_rating + (m / (rating_number + m)) * C
56
 
57
- # Example
58
- print("Product 1: ", product_quality_score(average_rating=4.25, rating_number=10000))
59
- print("Product 2: ", product_quality_score(average_rating=5.0, rating_number=1))
60
-
61
  def load_near_field_raw_from_huggingface():
62
  """
63
  Load the raw near-field dataset from HuggingFace.
@@ -119,53 +110,7 @@ def load_near_field_from_huggingface():
119
  ds = load_dataset(REPO_ID_NEAR_FIELD, split="train")
120
  return ds.to_pandas()
121
 
122
- def save_near_field_to_huggingface():
123
- """
124
- Save the near-field dataset from HuggingFace.
125
- """
126
- df = load_near_field_raw_from_huggingface()
127
- ds = Dataset.from_pandas(df)
128
- ds.push_to_hub(REPO_ID_NEAR_FIELD)
129
- print(f"✅ Pushed {len(ds)} rows to {REPO_ID_NEAR_FIELD}")
130
-
131
- #save_near_field_to_huggingface() # Run it once
132
  dataset_near_field = load_near_field_from_huggingface()
133
- print("Near-Field Length: ", len(dataset_near_field))
134
- #print(dataset_near_field.head())
135
-
136
- def load_far_field_from_sheet():
137
- """
138
- Load the far-field dataset from Google Sheets.
139
- """
140
- auth.authenticate_user()
141
- from google.auth import default
142
- COLS = ["title", "description", "average_rating", "rating_number"]
143
- categories = ["Home & Kitchen", "Beauty & Personal Care", "Sports & Outdoors", "Clothing, Shoes & Jewelry", "Industrial & Scientific",
144
- "Appliances", "Arts, Crafts & Sewing", "Electronics"]
145
- sh = gspread.authorize(default()[0]).open_by_key(SHEET_ID_FAR_FIELD)
146
- frames = []
147
- for ws in sh.worksheets(): # iterate ALL sheets/tabs
148
- rows = ws.get_all_records()
149
- if not rows:
150
- continue
151
- df = pd.DataFrame(rows)
152
- # Keep only the exact columns you want
153
- df = df[COLS].copy()
154
- # Add the tab name as main_category
155
- df["main_category"] = ws.title
156
- frames.append(df)
157
- df = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame(columns=COLS + ["main_category"])
158
-
159
- # Add product quality score column
160
- def _safe_pqs(row):
161
- ar, n = row["average_rating"], row["rating_number"]
162
- if pd.notna(ar) and pd.notna(n):
163
- return product_quality_score(ar, n)
164
- return float("nan")
165
-
166
- df["product_quality_score"] = df.apply(_safe_pqs, axis=1)
167
-
168
- return df
169
 
170
  def load_far_field_from_huggingface():
171
  """
@@ -174,19 +119,7 @@ def load_far_field_from_huggingface():
174
  ds = load_dataset(REPO_ID_FAR_FIELD, split="train")
175
  return ds.to_pandas()
176
 
177
- def save_far_field_to_huggingface():
178
- """
179
- Save the far-field dataset from HuggingFace.
180
- """
181
- df = load_far_field_from_sheet()
182
- ds = Dataset.from_pandas(df)
183
- ds.push_to_hub(REPO_ID_FAR_FIELD)
184
- print(f"✅ Pushed {len(ds)} rows to {REPO_ID_FAR_FIELD}")
185
-
186
- #save_far_field_to_huggingface() # Run it once
187
  dataset_far_field = load_far_field_from_huggingface()
188
- print("Far-Field Length: ",len(dataset_far_field))
189
- #print(dataset_far_field.head())
190
 
191
  def product_score(product_quality_score: float, fuzzy_score: float):
192
  """
@@ -194,10 +127,6 @@ def product_score(product_quality_score: float, fuzzy_score: float):
194
  """
195
  return math.sqrt(product_quality_score * fuzzy_score)
196
 
197
- # Example
198
- print("Product 1: ", product_score(product_quality_score=3.2, fuzzy_score=100))
199
- print("Product 2: ", product_score(product_quality_score=4.5, fuzzy_score=70))
200
-
201
  def query_near_field(input: str, top_k: int=1):
202
  """
203
  Return top_k fuzzy matches for query against dataset titles as a pandas DataFrame.
@@ -234,11 +163,6 @@ def query_near_field(input: str, top_k: int=1):
234
  .reset_index(drop=True)
235
  )
236
 
237
- # Example
238
- near_field_result = query_near_field("water bottle", top_k=5)
239
- #print(near_field_result.head())
240
- print("Example: ", near_field_result.iloc[0]["title"])
241
-
242
  def query_far_field(input: str, top_k: int):
243
  """
244
  Return top_k random elements from the far_field dataset as a pandas DataFrame.
@@ -267,12 +191,6 @@ def query_far_field(input: str, top_k: int):
267
 
268
  return sampled
269
 
270
- # Example usage
271
- far_field_result = query_far_field("water bottle", top_k=3)
272
- #print(far_field_result)
273
- print("Top result title:", far_field_result.iloc[0]["title"])
274
- #print("Top result title:", far_field_result)
275
-
276
  def split_near_and_far_fields(total_examples: int, near_far_ratio: float = 0.5):
277
  """
278
  Split the examples between near and far field.
@@ -290,9 +208,6 @@ def split_near_and_far_fields(total_examples: int, near_far_ratio: float = 0.5):
290
 
291
  return near_field_examples, far_field_examples
292
 
293
- # Example
294
- print("Example: ", split_near_and_far_fields(total_examples=100, near_far_ratio=0.3)) # Expected: (30, 70)
295
-
296
  def query(input: str, total_examples: int, near_far_ratio: float = 0.5):
297
  near_field_examples, far_field_examples = split_near_and_far_fields(total_examples, near_far_ratio)
298
  far_field_result = query_far_field(input, far_field_examples)
@@ -345,9 +260,7 @@ You are a product analyst. You'll receive product description as input, and extr
345
  Product functionality refers to what the product does: its features, technical capabilities, and performance characteristics. It answers the question: “What can this product do?”
346
  Product value refers to the benefit the customer gains from using the product: how it improves their life, solves their problem, or helps them achieve goals. It answers the question: “Why does this matter to the customer?”
347
  Do **not** duplicate an item in both lists. Keep **functionalities** as concrete features. Keep **values** as clear user benefits.
348
-
349
  Your Output is a dictionary. Here is the format:
350
-
351
  # Your Input:
352
  <product_description>
353
  # Your Output:
@@ -363,7 +276,6 @@ Your Output is a dictionary. Here is the format:
363
  ...
364
  ]
365
  }
366
-
367
  Don't return anything out of the output format.
368
  """
369
 
@@ -595,4 +507,4 @@ demo = gr.Interface(
595
  """
596
  )
597
 
598
- demo.launch(quiet=True)
 
 
 
1
  from langchain.prompts import PromptTemplate
2
  from langchain.chains import LLMChain
3
  from pyvis.network import Network
 
31
 
32
  # Setup
33
 
 
 
 
34
  REPO_ID_NEAR_FIELD_RAW = "milistu/AMAZON-Products-2023"
35
  REPO_ID_NEAR_FIELD = "aslan-ng/amazon_products_2023"
36
  REPO_ID_FAR_FIELD = "aslan-ng/amazon_products_2025"
37
+ REPO_ID_LORA_GREEN_PATENTS = "aslan-ng/lora-green-patents"
38
 
39
  def product_quality_score(average_rating: float, rating_number: int):
40
  """
41
  Bayesian Average (Amazon-style)
 
42
  Args:
43
  avg_rating: product's average rating
44
  rating_number: number of reviews
 
49
  return C # fallback to global mean
50
  return (rating_number / (rating_number + m)) * average_rating + (m / (rating_number + m)) * C
51
 
 
 
 
 
52
  def load_near_field_raw_from_huggingface():
53
  """
54
  Load the raw near-field dataset from HuggingFace.
 
110
  ds = load_dataset(REPO_ID_NEAR_FIELD, split="train")
111
  return ds.to_pandas()
112
 
 
 
 
 
 
 
 
 
 
 
113
  dataset_near_field = load_near_field_from_huggingface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  def load_far_field_from_huggingface():
116
  """
 
119
  ds = load_dataset(REPO_ID_FAR_FIELD, split="train")
120
  return ds.to_pandas()
121
 
 
 
 
 
 
 
 
 
 
 
122
  dataset_far_field = load_far_field_from_huggingface()
 
 
123
 
124
  def product_score(product_quality_score: float, fuzzy_score: float):
125
  """
 
127
  """
128
  return math.sqrt(product_quality_score * fuzzy_score)
129
 
 
 
 
 
130
  def query_near_field(input: str, top_k: int=1):
131
  """
132
  Return top_k fuzzy matches for query against dataset titles as a pandas DataFrame.
 
163
  .reset_index(drop=True)
164
  )
165
 
 
 
 
 
 
166
  def query_far_field(input: str, top_k: int):
167
  """
168
  Return top_k random elements from the far_field dataset as a pandas DataFrame.
 
191
 
192
  return sampled
193
 
 
 
 
 
 
 
194
  def split_near_and_far_fields(total_examples: int, near_far_ratio: float = 0.5):
195
  """
196
  Split the examples between near and far field.
 
208
 
209
  return near_field_examples, far_field_examples
210
 
 
 
 
211
  def query(input: str, total_examples: int, near_far_ratio: float = 0.5):
212
  near_field_examples, far_field_examples = split_near_and_far_fields(total_examples, near_far_ratio)
213
  far_field_result = query_far_field(input, far_field_examples)
 
260
  Product functionality refers to what the product does: its features, technical capabilities, and performance characteristics. It answers the question: “What can this product do?”
261
  Product value refers to the benefit the customer gains from using the product: how it improves their life, solves their problem, or helps them achieve goals. It answers the question: “Why does this matter to the customer?”
262
  Do **not** duplicate an item in both lists. Keep **functionalities** as concrete features. Keep **values** as clear user benefits.
 
263
  Your Output is a dictionary. Here is the format:
 
264
  # Your Input:
265
  <product_description>
266
  # Your Output:
 
276
  ...
277
  ]
278
  }
 
279
  Don't return anything out of the output format.
280
  """
281
 
 
507
  """
508
  )
509
 
510
+ demo.launch(quiet=True)