Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -33,8 +33,6 @@ BASE_SPIRITS = {
|
|
| 33 |
"vermouth": [r"\bvermouth\b"],
|
| 34 |
"other": [r"\btriple sec\b", r"\bliqueur\b", r"\bcointreau\b", r"\baperol\b", r"\bcampari\b"],
|
| 35 |
}
|
| 36 |
-
BASE_OPTIONS = list(BASE_SPIRITS.keys())
|
| 37 |
-
|
| 38 |
FLAVORS = {
|
| 39 |
"citrus": [r"lime", r"lemon", r"grapefruit", r"orange", r"\bcitrus\b"],
|
| 40 |
"sweet": [r"simple syrup", r"\bsugar\b", r"\bhoney\b", r"\bagave\b", r"\bmaple\b", r"\bgrenadine\b", r"\bvanilla\b", r"\bsweet\b"],
|
|
@@ -84,7 +82,6 @@ def _split_ingredient_blob(s):
|
|
| 84 |
return out
|
| 85 |
|
| 86 |
def _from_list_of_pairs(val):
|
| 87 |
-
# [(measure, name)] or [(name, measure)] β display + tokens
|
| 88 |
out_disp, out_tokens = [], []
|
| 89 |
for x in val:
|
| 90 |
if not isinstance(x, (list, tuple)) or len(x) == 0:
|
|
@@ -112,7 +109,6 @@ def _from_list_of_pairs(val):
|
|
| 112 |
return out_disp, out_tokens
|
| 113 |
|
| 114 |
def _from_list_of_dicts(val):
|
| 115 |
-
# [{"name":"gin","measure":"45 ml"}] β ["45 ml gin"], tokens ["gin"]
|
| 116 |
out_disp, out_tokens = [], []
|
| 117 |
for x in val:
|
| 118 |
if not isinstance(x, dict): continue
|
|
@@ -135,7 +131,6 @@ def _from_list_of_dicts(val):
|
|
| 135 |
return out_disp, out_tokens
|
| 136 |
|
| 137 |
def _ingredients_from_any(val):
|
| 138 |
-
# String blob
|
| 139 |
if isinstance(val, str):
|
| 140 |
lines = _split_ingredient_blob(val)
|
| 141 |
tokens = []
|
|
@@ -147,15 +142,12 @@ def _ingredients_from_any(val):
|
|
| 147 |
idx = i; break
|
| 148 |
tokens.append(" ".join(parts[idx:]).lower())
|
| 149 |
return lines, tokens
|
| 150 |
-
# List of strings
|
| 151 |
if isinstance(val, list) and all(isinstance(x, str) for x in val):
|
| 152 |
disp = [x.strip() for x in val if x and x.strip()]
|
| 153 |
tokens = [x.lower().strip() for x in disp]
|
| 154 |
return disp, tokens
|
| 155 |
-
# List of pairs
|
| 156 |
if isinstance(val, list) and any(isinstance(x, (list, tuple)) for x in val):
|
| 157 |
return _from_list_of_pairs(val)
|
| 158 |
-
# List of dicts
|
| 159 |
if isinstance(val, list) and any(isinstance(x, dict) for x in val):
|
| 160 |
return _from_list_of_dicts(val)
|
| 161 |
return [], []
|
|
@@ -167,7 +159,6 @@ def _get_title(row, cols):
|
|
| 167 |
return "Untitled"
|
| 168 |
|
| 169 |
def _get_ingredients_with_measures(row, cols):
|
| 170 |
-
# Prefer explicit tokenized + measure fields if present
|
| 171 |
if "ingredient_tokens" in cols and row.get("ingredient_tokens"):
|
| 172 |
toks = [str(x).strip().lower() for x in row["ingredient_tokens"] if str(x).strip()]
|
| 173 |
for mkey in ["measure_tokens","measures","measure_list"]:
|
|
@@ -178,8 +169,7 @@ def _get_ingredients_with_measures(row, cols):
|
|
| 178 |
n = str(n).strip()
|
| 179 |
disp.append(_join_measure_name(m, n) if m else n)
|
| 180 |
return disp, toks
|
| 181 |
-
return toks, toks
|
| 182 |
-
# Combined "ingredients" field or variants
|
| 183 |
for key in ["ingredients","ingredients_raw","raw_ingredients","Raw_Ingredients","Raw Ingredients",
|
| 184 |
"ingredient_list","ingredients_list"]:
|
| 185 |
if key in cols and row.get(key) not in (None, "", [], {}):
|
|
@@ -214,12 +204,11 @@ for r in ds:
|
|
| 214 |
ing_disp = [x for x in ing_disp if x]
|
| 215 |
ing_tokens = [x for x in ing_tokens if x]
|
| 216 |
|
| 217 |
-
# Embedding text: title + ingredient TOKENS (names only)
|
| 218 |
fused = f"{title}\nIngredients: {', '.join(ing_tokens)}"
|
| 219 |
DOCS.append({
|
| 220 |
"title": title,
|
| 221 |
-
"ingredients_display": ing_disp,
|
| 222 |
-
"ingredients_tokens": ing_tokens,
|
| 223 |
"text": fused,
|
| 224 |
"base": tag_base(fused),
|
| 225 |
"flavors": tag_flavors(fused),
|
|
@@ -236,31 +225,55 @@ doc_embs = encoder.encode(
|
|
| 236 |
).astype("float32")
|
| 237 |
|
| 238 |
# ========================
|
| 239 |
-
#
|
| 240 |
# ========================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
def _format_ingredients_markdown(lines):
|
| 242 |
if not lines:
|
| 243 |
return "β"
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
|
|
|
|
|
|
|
|
|
| 249 |
if flavor not in FLAVOR_OPTIONS:
|
| 250 |
return "Please choose a flavor."
|
| 251 |
|
| 252 |
-
# hard filter by base (fallback to all)
|
| 253 |
-
idxs = [i for i, d in enumerate(DOCS) if d["base"] ==
|
| 254 |
-
if not idxs:
|
| 255 |
idxs = list(range(len(DOCS)))
|
| 256 |
|
| 257 |
-
|
| 258 |
-
q_text = f"Base spirit: {base_alcohol}. Flavor: {flavor}. Cocktail recipe."
|
| 259 |
q_emb = encoder.encode([q_text], normalize_embeddings=True, convert_to_numpy=True).astype("float32")[0]
|
| 260 |
|
| 261 |
sims = doc_embs[idxs].dot(q_emb) # cosine since normalized
|
| 262 |
|
| 263 |
-
# flavor boost
|
| 264 |
scored = []
|
| 265 |
for pos, i in enumerate(idxs):
|
| 266 |
base_score = float(sims[pos])
|
|
@@ -290,25 +303,25 @@ def recommend(base_alcohol, flavor, top_k=3):
|
|
| 290 |
# UI
|
| 291 |
# ========================
|
| 292 |
with gr.Blocks() as demo:
|
| 293 |
-
gr.Markdown("# πΉ AI Bartender β Base + Flavor (Ingredients
|
| 294 |
|
| 295 |
with gr.Row():
|
| 296 |
-
|
| 297 |
flavor = gr.Dropdown(choices=FLAVOR_OPTIONS, value="citrus", label="Flavor")
|
| 298 |
topk = gr.Slider(1, 10, value=3, step=1, label="Number of recommendations")
|
| 299 |
|
| 300 |
with gr.Row():
|
| 301 |
-
ex1 = gr.Button("Gin + Citrus")
|
| 302 |
-
ex2 = gr.Button("Rum + Fruity")
|
| 303 |
-
ex3 = gr.Button("Mezcal + Smoky")
|
| 304 |
|
| 305 |
out = gr.Markdown()
|
| 306 |
-
gr.Button("Recommend").click(recommend, [
|
| 307 |
|
| 308 |
# Quick-fill examples
|
| 309 |
-
ex1.click(lambda: ("gin", "citrus", 3), outputs=[
|
| 310 |
-
ex2.click(lambda: ("rum", "fruity", 3), outputs=[
|
| 311 |
-
ex3.click(lambda: ("mezcal", "smoky", 3), outputs=[
|
| 312 |
|
| 313 |
if __name__ == "__main__":
|
| 314 |
demo.launch()
|
|
|
|
| 33 |
"vermouth": [r"\bvermouth\b"],
|
| 34 |
"other": [r"\btriple sec\b", r"\bliqueur\b", r"\bcointreau\b", r"\baperol\b", r"\bcampari\b"],
|
| 35 |
}
|
|
|
|
|
|
|
| 36 |
FLAVORS = {
|
| 37 |
"citrus": [r"lime", r"lemon", r"grapefruit", r"orange", r"\bcitrus\b"],
|
| 38 |
"sweet": [r"simple syrup", r"\bsugar\b", r"\bhoney\b", r"\bagave\b", r"\bmaple\b", r"\bgrenadine\b", r"\bvanilla\b", r"\bsweet\b"],
|
|
|
|
| 82 |
return out
|
| 83 |
|
| 84 |
def _from_list_of_pairs(val):
|
|
|
|
| 85 |
out_disp, out_tokens = [], []
|
| 86 |
for x in val:
|
| 87 |
if not isinstance(x, (list, tuple)) or len(x) == 0:
|
|
|
|
| 109 |
return out_disp, out_tokens
|
| 110 |
|
| 111 |
def _from_list_of_dicts(val):
|
|
|
|
| 112 |
out_disp, out_tokens = [], []
|
| 113 |
for x in val:
|
| 114 |
if not isinstance(x, dict): continue
|
|
|
|
| 131 |
return out_disp, out_tokens
|
| 132 |
|
| 133 |
def _ingredients_from_any(val):
|
|
|
|
| 134 |
if isinstance(val, str):
|
| 135 |
lines = _split_ingredient_blob(val)
|
| 136 |
tokens = []
|
|
|
|
| 142 |
idx = i; break
|
| 143 |
tokens.append(" ".join(parts[idx:]).lower())
|
| 144 |
return lines, tokens
|
|
|
|
| 145 |
if isinstance(val, list) and all(isinstance(x, str) for x in val):
|
| 146 |
disp = [x.strip() for x in val if x and x.strip()]
|
| 147 |
tokens = [x.lower().strip() for x in disp]
|
| 148 |
return disp, tokens
|
|
|
|
| 149 |
if isinstance(val, list) and any(isinstance(x, (list, tuple)) for x in val):
|
| 150 |
return _from_list_of_pairs(val)
|
|
|
|
| 151 |
if isinstance(val, list) and any(isinstance(x, dict) for x in val):
|
| 152 |
return _from_list_of_dicts(val)
|
| 153 |
return [], []
|
|
|
|
| 159 |
return "Untitled"
|
| 160 |
|
| 161 |
def _get_ingredients_with_measures(row, cols):
|
|
|
|
| 162 |
if "ingredient_tokens" in cols and row.get("ingredient_tokens"):
|
| 163 |
toks = [str(x).strip().lower() for x in row["ingredient_tokens"] if str(x).strip()]
|
| 164 |
for mkey in ["measure_tokens","measures","measure_list"]:
|
|
|
|
| 169 |
n = str(n).strip()
|
| 170 |
disp.append(_join_measure_name(m, n) if m else n)
|
| 171 |
return disp, toks
|
| 172 |
+
return toks, toks
|
|
|
|
| 173 |
for key in ["ingredients","ingredients_raw","raw_ingredients","Raw_Ingredients","Raw Ingredients",
|
| 174 |
"ingredient_list","ingredients_list"]:
|
| 175 |
if key in cols and row.get(key) not in (None, "", [], {}):
|
|
|
|
| 204 |
ing_disp = [x for x in ing_disp if x]
|
| 205 |
ing_tokens = [x for x in ing_tokens if x]
|
| 206 |
|
|
|
|
| 207 |
fused = f"{title}\nIngredients: {', '.join(ing_tokens)}"
|
| 208 |
DOCS.append({
|
| 209 |
"title": title,
|
| 210 |
+
"ingredients_display": ing_disp,
|
| 211 |
+
"ingredients_tokens": ing_tokens,
|
| 212 |
"text": fused,
|
| 213 |
"base": tag_base(fused),
|
| 214 |
"flavors": tag_flavors(fused),
|
|
|
|
| 225 |
).astype("float32")
|
| 226 |
|
| 227 |
# ========================
|
| 228 |
+
# Pretty ingredient formatting
|
| 229 |
# ========================
|
| 230 |
+
_MEASURE_RE = re.compile(
|
| 231 |
+
r"^\s*(?P<meas>(?:\d+(\.\d+)?|\d+\s*/\s*\d+|\d+\s*\d*/\d+)\s*(?:ml|oz|tsp|tbsp)?|\d+\s*(?:ml|oz|tsp|tbsp)|(?:dash|dashes|drop|drops|barspoon)s?)\b[\s\-β:]*",
|
| 232 |
+
flags=re.I
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
def _split_measure_name_line(line: str):
|
| 236 |
+
if not isinstance(line, str): return None, line
|
| 237 |
+
m = _MEASURE_RE.match(line.strip())
|
| 238 |
+
if m:
|
| 239 |
+
meas = _norm_measure(m.group("meas"))
|
| 240 |
+
name = line[m.end():].strip()
|
| 241 |
+
return meas, name or ""
|
| 242 |
+
return "", line.strip()
|
| 243 |
+
|
| 244 |
def _format_ingredients_markdown(lines):
|
| 245 |
if not lines:
|
| 246 |
return "β"
|
| 247 |
+
formatted = []
|
| 248 |
+
for ln in lines:
|
| 249 |
+
meas, name = _split_measure_name_line(ln)
|
| 250 |
+
if meas and name:
|
| 251 |
+
formatted.append(f"- **{meas}** β {name}")
|
| 252 |
+
elif name:
|
| 253 |
+
formatted.append(f"- {name}")
|
| 254 |
+
else:
|
| 255 |
+
formatted.append(f"- {ln}")
|
| 256 |
+
return "\n".join(formatted)
|
| 257 |
|
| 258 |
+
# ========================
|
| 259 |
+
# Recommendation
|
| 260 |
+
# ========================
|
| 261 |
+
def recommend(base_alcohol_text, flavor, top_k=3):
|
| 262 |
+
# Map user-typed base to our internal base tags; fallback to 'other'
|
| 263 |
+
inferred_base = tag_base(base_alcohol_text or "")
|
| 264 |
if flavor not in FLAVOR_OPTIONS:
|
| 265 |
return "Please choose a flavor."
|
| 266 |
|
| 267 |
+
# hard filter by inferred base (fallback to all if 'other' or no matches)
|
| 268 |
+
idxs = [i for i, d in enumerate(DOCS) if d["base"] == inferred_base]
|
| 269 |
+
if inferred_base == "other" or not idxs:
|
| 270 |
idxs = list(range(len(DOCS)))
|
| 271 |
|
| 272 |
+
q_text = f"Base spirit: {base_alcohol_text}. Flavor: {flavor}. Cocktail recipe."
|
|
|
|
| 273 |
q_emb = encoder.encode([q_text], normalize_embeddings=True, convert_to_numpy=True).astype("float32")[0]
|
| 274 |
|
| 275 |
sims = doc_embs[idxs].dot(q_emb) # cosine since normalized
|
| 276 |
|
|
|
|
| 277 |
scored = []
|
| 278 |
for pos, i in enumerate(idxs):
|
| 279 |
base_score = float(sims[pos])
|
|
|
|
| 303 |
# UI
|
| 304 |
# ========================
|
| 305 |
with gr.Blocks() as demo:
|
| 306 |
+
gr.Markdown("# πΉ AI Bartender β Type a Base + Flavor (Clear Ingredients)")
|
| 307 |
|
| 308 |
with gr.Row():
|
| 309 |
+
base_text = gr.Textbox(value="gin", label="Base alcohol (type any spirit, e.g., 'gin', 'white rum', 'bourbon')")
|
| 310 |
flavor = gr.Dropdown(choices=FLAVOR_OPTIONS, value="citrus", label="Flavor")
|
| 311 |
topk = gr.Slider(1, 10, value=3, step=1, label="Number of recommendations")
|
| 312 |
|
| 313 |
with gr.Row():
|
| 314 |
+
ex1 = gr.Button("Example: Gin + Citrus")
|
| 315 |
+
ex2 = gr.Button("Example: Rum + Fruity")
|
| 316 |
+
ex3 = gr.Button("Example: Mezcal + Smoky")
|
| 317 |
|
| 318 |
out = gr.Markdown()
|
| 319 |
+
gr.Button("Recommend").click(recommend, [base_text, flavor, topk], out)
|
| 320 |
|
| 321 |
# Quick-fill examples
|
| 322 |
+
ex1.click(lambda: ("gin", "citrus", 3), outputs=[base_text, flavor, topk])
|
| 323 |
+
ex2.click(lambda: ("white rum", "fruity", 3), outputs=[base_text, flavor, topk])
|
| 324 |
+
ex3.click(lambda: ("mezcal", "smoky", 3), outputs=[base_text, flavor, topk])
|
| 325 |
|
| 326 |
if __name__ == "__main__":
|
| 327 |
demo.launch()
|