Spaces:

jennifee
/

Product_First_Principles_Decomposition

Sleeping

App Files Files Community

jennifee

aslan-ng commited on Oct 6, 2025

Commit

8cd4c71

verified ·

1 Parent(s): 0fed93a

Update app.py (#5)

Browse files

- Update app.py (d03a66cdb5555dafa34555e5f7802e5c03892b1a)

Co-authored-by: Aslan Noorghasemi <[email protected]>

Files changed (1) hide show

app.py +2 -90

app.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# Import dependencies
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
 from pyvis.network import Network
@@ -33,17 +31,14 @@ from peft import PeftModel, LoraConfig, get_peft_model, TaskType
 # Setup
-#token_public = ""
-#login(token)
 REPO_ID_NEAR_FIELD_RAW = "milistu/AMAZON-Products-2023"
 REPO_ID_NEAR_FIELD = "aslan-ng/amazon_products_2023"
 REPO_ID_FAR_FIELD = "aslan-ng/amazon_products_2025"
 def product_quality_score(average_rating: float, rating_number: int):
     """
     Bayesian Average (Amazon-style)
     Args:
       avg_rating: product's average rating
       rating_number: number of reviews
@@ -54,10 +49,6 @@ def product_quality_score(average_rating: float, rating_number: int):
         return C  # fallback to global mean
     return (rating_number / (rating_number + m)) * average_rating + (m / (rating_number + m)) * C
-# Example
-print("Product 1: ", product_quality_score(average_rating=4.25, rating_number=10000))
-print("Product 2: ", product_quality_score(average_rating=5.0, rating_number=1))
 def load_near_field_raw_from_huggingface():
     """
     Load the raw near-field dataset from HuggingFace.
@@ -119,53 +110,7 @@ def load_near_field_from_huggingface():
     ds = load_dataset(REPO_ID_NEAR_FIELD, split="train")
     return ds.to_pandas()
-def save_near_field_to_huggingface():
-    """
-    Save the near-field dataset from HuggingFace.
-    """
-    df = load_near_field_raw_from_huggingface()
-    ds = Dataset.from_pandas(df)
-    ds.push_to_hub(REPO_ID_NEAR_FIELD)
-    print(f"✅ Pushed {len(ds)} rows to {REPO_ID_NEAR_FIELD}")
-#save_near_field_to_huggingface() # Run it once
 dataset_near_field = load_near_field_from_huggingface()
-print("Near-Field Length: ", len(dataset_near_field))
-#print(dataset_near_field.head())
-def load_far_field_from_sheet():
-    """
-    Load the far-field dataset from Google Sheets.
-    """
-    auth.authenticate_user()
-    from google.auth import default
-    COLS = ["title", "description", "average_rating", "rating_number"]
-    categories = ["Home & Kitchen", "Beauty & Personal Care", "Sports & Outdoors", "Clothing, Shoes & Jewelry", "Industrial & Scientific",
-                  "Appliances", "Arts, Crafts & Sewing", "Electronics"]
-    sh = gspread.authorize(default()[0]).open_by_key(SHEET_ID_FAR_FIELD)
-    frames = []
-    for ws in sh.worksheets():  # iterate ALL sheets/tabs
-        rows = ws.get_all_records()
-        if not rows:
-            continue
-        df = pd.DataFrame(rows)
-        # Keep only the exact columns you want
-        df = df[COLS].copy()
-        # Add the tab name as main_category
-        df["main_category"] = ws.title
-        frames.append(df)
-    df = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame(columns=COLS + ["main_category"])
-    # Add product quality score column
-    def _safe_pqs(row):
-        ar, n = row["average_rating"], row["rating_number"]
-        if pd.notna(ar) and pd.notna(n):
-            return product_quality_score(ar, n)
-        return float("nan")
-    df["product_quality_score"] = df.apply(_safe_pqs, axis=1)
-    return df
 def load_far_field_from_huggingface():
     """
@@ -174,19 +119,7 @@ def load_far_field_from_huggingface():
     ds = load_dataset(REPO_ID_FAR_FIELD, split="train")
     return ds.to_pandas()
-def save_far_field_to_huggingface():
-    """
-    Save the far-field dataset from HuggingFace.
-    """
-    df = load_far_field_from_sheet()
-    ds = Dataset.from_pandas(df)
-    ds.push_to_hub(REPO_ID_FAR_FIELD)
-    print(f"✅ Pushed {len(ds)} rows to {REPO_ID_FAR_FIELD}")
-#save_far_field_to_huggingface() # Run it once
 dataset_far_field = load_far_field_from_huggingface()
-print("Far-Field Length: ",len(dataset_far_field))
-#print(dataset_far_field.head())
 def product_score(product_quality_score: float, fuzzy_score: float):
     """
@@ -194,10 +127,6 @@ def product_score(product_quality_score: float, fuzzy_score: float):
     """
     return math.sqrt(product_quality_score * fuzzy_score)
-# Example
-print("Product 1: ", product_score(product_quality_score=3.2, fuzzy_score=100))
-print("Product 2: ", product_score(product_quality_score=4.5, fuzzy_score=70))
 def query_near_field(input: str, top_k: int=1):
     """
     Return top_k fuzzy matches for query against dataset titles as a pandas DataFrame.
@@ -234,11 +163,6 @@ def query_near_field(input: str, top_k: int=1):
         .reset_index(drop=True)
     )
-# Example
-near_field_result = query_near_field("water bottle", top_k=5)
-#print(near_field_result.head())
-print("Example: ", near_field_result.iloc[0]["title"])
 def query_far_field(input: str, top_k: int):
     """
     Return top_k random elements from the far_field dataset as a pandas DataFrame.
@@ -267,12 +191,6 @@ def query_far_field(input: str, top_k: int):
     return sampled
-# Example usage
-far_field_result = query_far_field("water bottle", top_k=3)
-#print(far_field_result)
-print("Top result title:", far_field_result.iloc[0]["title"])
-#print("Top result title:", far_field_result)
 def split_near_and_far_fields(total_examples: int, near_far_ratio: float = 0.5):
     """
     Split the examples between near and far field.
@@ -290,9 +208,6 @@ def split_near_and_far_fields(total_examples: int, near_far_ratio: float = 0.5):
     return near_field_examples, far_field_examples
-# Example
-print("Example: ", split_near_and_far_fields(total_examples=100, near_far_ratio=0.3)) # Expected: (30, 70)
 def query(input: str, total_examples: int, near_far_ratio: float = 0.5):
     near_field_examples, far_field_examples = split_near_and_far_fields(total_examples, near_far_ratio)
     far_field_result = query_far_field(input, far_field_examples)
@@ -345,9 +260,7 @@ You are a product analyst. You'll receive product description as input, and extr
 Product functionality refers to what the product does: its features, technical capabilities, and performance characteristics. It answers the question: “What can this product do?”
 Product value refers to the benefit the customer gains from using the product: how it improves their life, solves their problem, or helps them achieve goals. It answers the question: “Why does this matter to the customer?”
 Do **not** duplicate an item in both lists. Keep **functionalities** as concrete features. Keep **values** as clear user benefits.
 Your Output is a dictionary. Here is the format:
 # Your Input:
   <product_description>
 # Your Output:
@@ -363,7 +276,6 @@ Your Output is a dictionary. Here is the format:
     ...
   ]
 }
 Don't return anything out of the output format.
 """
@@ -595,4 +507,4 @@ demo = gr.Interface(
     """
 )
-demo.launch(quiet=True)

 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
 from pyvis.network import Network
 # Setup
 REPO_ID_NEAR_FIELD_RAW = "milistu/AMAZON-Products-2023"
 REPO_ID_NEAR_FIELD = "aslan-ng/amazon_products_2023"
 REPO_ID_FAR_FIELD = "aslan-ng/amazon_products_2025"
+REPO_ID_LORA_GREEN_PATENTS = "aslan-ng/lora-green-patents"
 def product_quality_score(average_rating: float, rating_number: int):
     """
     Bayesian Average (Amazon-style)
     Args:
       avg_rating: product's average rating
       rating_number: number of reviews
         return C  # fallback to global mean
     return (rating_number / (rating_number + m)) * average_rating + (m / (rating_number + m)) * C
 def load_near_field_raw_from_huggingface():
     """
     Load the raw near-field dataset from HuggingFace.
     ds = load_dataset(REPO_ID_NEAR_FIELD, split="train")
     return ds.to_pandas()
 dataset_near_field = load_near_field_from_huggingface()
 def load_far_field_from_huggingface():
     """
     ds = load_dataset(REPO_ID_FAR_FIELD, split="train")
     return ds.to_pandas()
 dataset_far_field = load_far_field_from_huggingface()
 def product_score(product_quality_score: float, fuzzy_score: float):
     """
     """
     return math.sqrt(product_quality_score * fuzzy_score)
 def query_near_field(input: str, top_k: int=1):
     """
     Return top_k fuzzy matches for query against dataset titles as a pandas DataFrame.
         .reset_index(drop=True)
     )
 def query_far_field(input: str, top_k: int):
     """
     Return top_k random elements from the far_field dataset as a pandas DataFrame.
     return sampled
 def split_near_and_far_fields(total_examples: int, near_far_ratio: float = 0.5):
     """
     Split the examples between near and far field.
     return near_field_examples, far_field_examples
 def query(input: str, total_examples: int, near_far_ratio: float = 0.5):
     near_field_examples, far_field_examples = split_near_and_far_fields(total_examples, near_far_ratio)
     far_field_result = query_far_field(input, far_field_examples)
 Product functionality refers to what the product does: its features, technical capabilities, and performance characteristics. It answers the question: “What can this product do?”
 Product value refers to the benefit the customer gains from using the product: how it improves their life, solves their problem, or helps them achieve goals. It answers the question: “Why does this matter to the customer?”
 Do **not** duplicate an item in both lists. Keep **functionalities** as concrete features. Keep **values** as clear user benefits.
 Your Output is a dictionary. Here is the format:
 # Your Input:
   <product_description>
 # Your Output:
     ...
   ]
 }
 Don't return anything out of the output format.
 """
     """
 )
+demo.launch(quiet=True)