Spaces:

Francesco
/

FairytaleDJ

Runtime error

App Files Files Community

Francesco commited on May 15, 2023

Commit

4d2ab8e

1 Parent(s): d78781e

new version with weighted probs when selecting song

Browse files

Files changed (7) hide show

app.py +118 -31
embeddings.npy +0 -0
names.py +0 -1
playground.py +0 -60
prompts/bot.prompt +8 -5
requirements.txt +0 -2
temp.ipynb +0 -381

app.py CHANGED Viewed

@@ -6,14 +6,27 @@ from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
 load_dotenv()
-import os
 import json
 from langchain.chat_models import ChatOpenAI
 from langchain.embeddings.openai import OpenAIEmbeddings
 from data import load_db
 from names import DATASET_ID, MODEL_ID
-import random
 @st.cache_resource
@@ -30,51 +43,125 @@ def init():
     )
     prompt = PromptTemplate(
-    input_variables=["songs", "user_input"],
-    template=Path("prompts/bot_with_summary.prompt").read_text(),
-)
-    llm = ChatOpenAI(temperature=0.7)
     chain = LLMChain(llm=llm, prompt=prompt)
-    with open("data/emotions_with_spotify_url.json", "r") as f:
-        data = json.load(f)
-    movies_and_names_to_songs = {}
-    songs_str = ""
-    for movie, songs in data.items():
-        for song in songs:
-            movie_and_name = f"{movie};{song['name']}".lower()
-            songs_str += f"{movie_and_name}:{song['text']}\n"
-            movies_and_names_to_songs[movie_and_name] = song
-    return db, chain, movies_and_names_to_songs, songs_str
-db, chain, movies_and_names_to_songs, songs_str = init()
-st.title("Disney song for you")
 text_input = st.text_input(
     label="How are you feeling today?",
     placeholder="I am ready to rock and rool!",
 )
-clicked = st.button("Click me")
 placeholder_emotions = st.empty()
 placeholder = st.empty()
-def get_emotions(songs_str, user_input):
-    res = chain.run(songs=songs_str, user_input=user_input)
-    song_key = random.choice(eval(res))
-    doc = movies_and_names_to_songs[song_key.lower()]
-    print(f"Reply: {res}, chosen: {song_key}")
-    with placeholder:
-        embed_url = doc["embed_url"]
-        iframe_html = f'<iframe src="{embed_url}" style="border:0"> </iframe>'
-        st.components.v1.html(f"<div style='display:flex;flex-direction:column'>{iframe_html}</div>")
-if clicked:
-    get_emotions(songs_str, text_input)

 from langchain.prompts import PromptTemplate
 load_dotenv()
 import json
+import os
+import random
+from enum import Enum
+from typing import List, Tuple
+import numpy as np
 from langchain.chat_models import ChatOpenAI
 from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.schema import Document
 from data import load_db
 from names import DATASET_ID, MODEL_ID
+class RetrievalType:
+    FIRST_MATCH = "first-match"
+    POOL_MATCHES = "pool-matches"
+Matches = List[Tuple[Document, float]]
 @st.cache_resource
     )
     prompt = PromptTemplate(
+        input_variables=["user_input"],
+        template=Path("prompts/bot.prompt").read_text(),
+    )
+    llm = ChatOpenAI(temperature=0.3)
     chain = LLMChain(llm=llm, prompt=prompt)
+    return db, chain
+# Don't show the setting sidebar
+if "sidebar_state" not in st.session_state:
+    st.session_state.sidebar_state = "collapsed"
+st.set_page_config(initial_sidebar_state=st.session_state.sidebar_state)
+db, chain = init()
+st.title("Disney songs for you 🎵🏰")
+st.markdown(
+    """
+*<small>Made with [DeepLake](https://www.deeplake.ai/) 🚀 and [LangChain](https://python.langchain.com/en/latest/index.html) 🦜⛓️</small>*
+💫 Unleash the magic within you with our enchanting app, turning your sentiments into a Disney soundtrack! 🌈 Just express your emotions, and embark on a whimsical journey as we tailor a Disney melody to match your mood. 👑💖""",
+    unsafe_allow_html=True,
+)
+how_it_works = st.expander(label="How it works")
 text_input = st.text_input(
     label="How are you feeling today?",
     placeholder="I am ready to rock and rool!",
 )
+run_btn = st.button("Make me sing! 🎶")
+with how_it_works:
+    st.markdown(
+        """
+The application follows a sequence of steps to deliver Disney songs matching the user's emotions:
+- **User Input**: The application starts by collecting user's emotional state through a text input.
+- **Emotion Encoding**: The user-provided emotions are then fed to a Language Model (LLM). The LLM interprets and encodes these emotions.
+- **Similarity Search**: These encoded emotions are utilized to perform a similarity search within our [vector database](https://www.deeplake.ai/). This database houses Disney songs, each represented as emotional embeddings.
+- **Song Selection**: From the pool of top matching songs, the application randomly selects one. The selection is weighted, giving preference to songs with higher similarity scores.
+- **Song Retrieval**: The selected song's embedded player is displayed on the webpage for the user. Additionally, the LLM interpreted emotional state associated with the chosen song is displayed.
+"""
+    )
 placeholder_emotions = st.empty()
 placeholder = st.empty()
+with st.sidebar:
+    st.text("App settings")
+    filter_threshold = st.slider(
+        "Threadhol used to filter out low scoring songs",
+        min_value=0.0,
+        max_value=1.0,
+        value=0.8,
+    )
+    max_number_of_songs = st.slider(
+        "Max number of songs we will retrieve from the db",
+        min_value=5,
+        max_value=50,
+        value=20,
+        step=1,
+    )
+    number_of_displayed_songs = st.slider(
+        "Number of displayed songs", min_value=1, max_value=4, value=1, step=1
+    )
+def filter_scores(matches: Matches, th: float = 0.8) -> Matches:
+    return [(doc, score) for (doc, score) in matches if score > th]
+def normalize_scores_by_sum(matches: Matches) -> Matches:
+    scores = [score for _, score in matches]
+    tot = sum(scores)
+    return [(doc, (score / tot)) for doc, score in matches]
+def get_song(user_input: str, k: int = 20):
+    emotions = chain.run(user_input=user_input)
+    matches = db.similarity_search_with_score(emotions, distance_metric="cos", k=k)
+    # [print(doc.metadata['name'], score) for doc, score in matches]
+    docs, scores = zip(
+        *normalize_scores_by_sum(filter_scores(matches, filter_threshold))
+    )
+    choosen_docs = np.random.choice(docs, size=number_of_displayed_songs, p=scores)
+    return choosen_docs, emotions
+def set_song(user_input):
+    if user_input == "":
+        return
+    # take first 120 chars
+    user_input = user_input[:120]
+    docs, emotions = get_song(user_input, k=max_number_of_songs)
+    with placeholder_emotions:
+        st.markdown("Your emotions: `" + emotions + "`")
+    with placeholder:
+        iframes_html = ""
+        for doc in docs:
+            print(doc.metadata["name"])
+            embed_url = doc.metadata["embed_url"]
+            iframes_html += (
+                f'<iframe src="{embed_url}" style="border:0;height:100px"> </iframe>'
+            )
+        st.markdown(
+            f"<div style='display:flex;flex-direction:column'>{iframes_html}</div>",
+            unsafe_allow_html=True,
+        )
+        # st.components.v1.html(
+        #     f"<div>{iframes_html}</div>"
+        # )
+if run_btn:
+    set_song(text_input)

embeddings.npy DELETED Viewed

Binary file (24.7 kB)

names.py CHANGED Viewed

@@ -1,4 +1,3 @@
 MODEL_ID = "text-embedding-ada-002"
 # DATASET_ID = "disney-lyrics"
 DATASET_ID = "disney-lyrics-emotions"

 MODEL_ID = "text-embedding-ada-002"
 # DATASET_ID = "disney-lyrics"
 DATASET_ID = "disney-lyrics-emotions"

playground.py DELETED Viewed

@@ -1,60 +0,0 @@
-from dotenv import load_dotenv
-load_dotenv()
-import json
-import os
-from pathlib import Path
-import deeplake
-import numpy as np
-import openai
-# https://www.disneyclips.com/lyrics/
-DATASET_NAME = "disney-lyrics"
-model_id = "text-embedding-ada-002"
-dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_ID']}/{DATASET_NAME}"
-print(dataset_path)
-runtime = {"db_engine": True}
-with open("lyrics.json", "rb") as f:
-    lyrics = json.load(f)["lyrics"]
-# embeddings = [el["embedding"] for el in openai.Embedding.create(input=lyrics, model=model_id)['data']]
-# embeddings_np = np.array(embeddings)
-# np.save("embeddings.npy", embeddings_np)
-embeddings_np = np.load("embeddings.npy")
-print(embeddings_np.shape)
-# ds = deeplake.empty(dataset_path, runtime=runtime, overwrite=True)
-# # https://docs.deeplake.ai/en/latest/Htypes.html
-# with ds:
-#     ds.create_tensor("embedding", htype="embedding", dtype=np.float32, exist_ok=True)
-#     ds.extend({ "embedding": embeddings_np.astype(np.float32)})
-#     ds.summary()
-search_term = "Let's get down to business"
-embedding = openai.Embedding.create(input=search_term, model="text-embedding-ada-002")[
-    "data"
-][0]["embedding"]
-# Format the embedding as a string, so it can be passed in the REST API request.
-embedding_search = ",".join([str(item) for item in embedding])
-# embedding_search = ",".join([str(item) for item in embeddings_np[0].tolist()])
-# print(embedding_search)
-ds = deeplake.load(dataset_path)
-# print(embedding_search)
-query = f'select * from (select l2_norm(embedding - ARRAY[{embedding_search}]) as score from "{dataset_path}") order by score desc limit 5'
-with open("foo.txt", "w") as f:
-    f.write(query)
-query_res = ds.query(query)
-print(query_res)

prompts/bot.prompt CHANGED Viewed

@@ -1,9 +1,12 @@
-We have a simple song retrieval system. It accepts a max of 4 emotions. You are tasked to suggest emotions to match the users feelings. Let me show you a couple of examples
 Input: "I had a great day!"
-Output: "Joy and Energy"
 Input: "I am very tired today and I am not feeling weel"
-Output: "Exhaustion, Discomfort, and Fatigue"
-If the sentence is too short, you can also suggest just one or two emotions.
-Please, suggest emotions for input = "{content}", reply ONLY with a max of 4 emotions.

+We have a simple song retrieval system. It accepts 8 emotions. You are tasked to suggest between 1 and 4 emotions to match the users feelings. Suggest more emotions for longer sentences and just one or two for small ones, trying to condense the main theme of the input
+Examples:
 Input: "I had a great day!"
+"Joy"
 Input: "I am very tired today and I am not feeling weel"
+"Exhaustion, Discomfort, and Fatigue"
+Input: "I am in Love"
+"Love"
+Please, uggest emotions for input = "{user_input}", reply ONLY with a list of emotions/feelings/vibes

requirements.txt CHANGED Viewed

@@ -1,6 +1,4 @@
 openai
-torch==2.0.1
-torchvision
 python-dotenv
 deeplake
 langchain

 openai
 python-dotenv
 deeplake
 langchain

temp.ipynb DELETED Viewed

@@ -1,381 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "26b62e0c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "b1a6a020",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/zuppif/miniconda3/envs/activeloop/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.4.3) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
-      "  warnings.warn(\n",
-      "-"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/zuppif/disney-lyrics-emotions\n",
-      "\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\\"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "hub://zuppif/disney-lyrics-emotions loaded successfully.\n",
-      "\n",
-      "Deep Lake Dataset in hub://zuppif/disney-lyrics-emotions already exists, loading from the storage\n",
-      "Dataset(path='hub://zuppif/disney-lyrics-emotions', read_only=True, tensors=['embedding', 'ids', 'metadata', 'text'])\n",
-      "\n",
-      "  tensor     htype     shape      dtype  compression\n",
-      "  -------   -------   -------    -------  ------- \n",
-      " embedding  generic  (85, 1536)  float32   None   \n",
-      "    ids      text     (85, 1)      str     None   \n",
-      " metadata    json     (85, 1)      str     None   \n",
-      "   text      text     (85, 1)      str     None   \n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      " \r",
-      "\r",
-      " \r"
-     ]
-    }
-   ],
-   "source": [
-    "from dotenv import load_dotenv\n",
-    "load_dotenv() \n",
-    "from names import DATASET_ID, MODEL_ID\n",
-    "from data import load_db\n",
-    "import os\n",
-    "from langchain.chains import RetrievalQA, ConversationalRetrievalChain\n",
-    "from langchain.vectorstores import DeepLake\n",
-    "from langchain.llms import OpenAI\n",
-    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
-    "from langchain.chat_models import ChatOpenAI\n",
-    "\n",
-    "embeddings = OpenAIEmbeddings(model=MODEL_ID)\n",
-    "dataset_path = f\"hub://{os.environ['ACTIVELOOP_ORG_ID']}/{DATASET_ID}\"\n",
-    "\n",
-    "db = load_db(dataset_path, embedding_function=embeddings, token=os.environ['ACTIVELOOP_TOKEN'], org_id=os.environ[\"ACTIVELOOP_ORG_ID\"], read_only=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "97c3370c",
-   "metadata": {},
-   "source": [
-    "## Using similarity search"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 75,
-   "id": "07d8a381",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.chains import LLMChain\n",
-    "from langchain.prompts import PromptTemplate\n",
-    "from pathlib import Path\n",
-    "\n",
-    "prompt = PromptTemplate(\n",
-    "    input_variables=[\"content\"],\n",
-    "    template=Path(\"prompts/bot.prompt\").read_text(),\n",
-    ")\n",
-    "\n",
-    "llm = ChatOpenAI(temperature=0.7)\n",
-    "\n",
-    "chain = LLMChain(llm=llm, prompt=prompt)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 76,
-   "id": "ebca722d",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'Exhaustion, Fatigue, Sleepiness, Drained.'"
-      ]
-     },
-     "execution_count": 76,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "emotions = chain.run(content=\"Damn I am feeling so tired\")\n",
-    "emotions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 77,
-   "id": "9598a36c",
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[(Document(page_content='Hopeful, determined, inspired, optimistic, longing, driven, passionate, adventurous.', metadata={'movie': 'Hercules', 'name': 'Go the Distance', 'embed_url': 'https://open.spotify.com/embed/track/0D1OY0M5A0qD5HGBvFmFid?utm_source=generator'}), 0.8135085701942444), (Document(page_content='upset, mad, regret, sad, fine, longing, hopeful, impatient', metadata={'movie': 'Encanto', 'name': 'Waiting on a Miracle', 'embed_url': 'https://open.spotify.com/embed/track/3oRW9ZGPRbLRMneQ5lwflt?utm_source=generator'}), 0.8108540177345276), (Document(page_content='nasty, repentant, magic, sad, lonely, bored, withdrawn, busy', metadata={'movie': 'The Little Mermaid', 'name': 'Poor Unfortunate Souls', 'embed_url': 'https://open.spotify.com/embed/track/7zsw78LtXUD7JfEwH64HK2?utm_source=generator'}), 0.8080281615257263), (Document(page_content='hopeful, optimistic, dreamy, inspired, happy, content, fulfilled, grateful', metadata={'movie': 'Pinocchio', 'name': 'When You Wish Upon a Star', 'embed_url': 'https://open.spotify.com/embed/track/1WrPa4lrIddctGWAIYYfP9?utm_source=generator'}), 0.8055723309516907)]\n",
-      "https://open.spotify.com/embed/track/0D1OY0M5A0qD5HGBvFmFid?utm_source=generator\n",
-      "page_content='Hopeful, determined, inspired, optimistic, longing, driven, passionate, adventurous.' metadata={'movie': 'Hercules', 'name': 'Go the Distance', 'embed_url': 'https://open.spotify.com/embed/track/0D1OY0M5A0qD5HGBvFmFid?utm_source=generator'}\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "        <iframe\n",
-       "            width=\"700\"\n",
-       "            height=\"350\"\n",
-       "            src=\"https://open.spotify.com/embed/track/0D1OY0M5A0qD5HGBvFmFid?utm_source=generator\"\n",
-       "            frameborder=\"0\"\n",
-       "            allowfullscreen\n",
-       "            \n",
-       "        ></iframe>\n",
-       "        "
-      ],
-      "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f1890ed7430>"
-      ]
-     },
-     "execution_count": 77,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "matches = db.similarity_search_with_score(emotions, distance_metric=\"cos\")\n",
-    "print(matches)\n",
-    "doc, score = matches[0]\n",
-    "print(doc.metadata[\"embed_url\"])\n",
-    "print(doc)\n",
-    "\n",
-    "from IPython.display import IFrame\n",
-    "IFrame(doc.metadata[\"embed_url\"], width=700, height=350)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8a474a1c",
-   "metadata": {},
-   "source": [
-    "## Using all the songs emotions in the prommpt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "id": "c3cb2f3d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "from langchain.chains import LLMChain\n",
-    "from langchain.prompts import PromptTemplate\n",
-    "from pathlib import Path\n",
-    "\n",
-    "prompt = PromptTemplate(\n",
-    "    input_variables=[\"songs\", \"user_input\"],\n",
-    "    template=Path(\"prompts/bot_with_summary.prompt\").read_text(),\n",
-    ")\n",
-    "\n",
-    "llm = ChatOpenAI(temperature=0.7)\n",
-    "\n",
-    "chain = LLMChain(llm=llm, prompt=prompt)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b1ca9c9c",
-   "metadata": {},
-   "source": [
-    "Let's create the songs string"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "id": "00416443",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open(\"data/emotions_with_spotify_url.json\", \"r\") as f:\n",
-    "    data = json.load(f)\n",
-    "    \n",
-    "movies_and_names_to_songs = {}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "id": "e4bf60d4",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "songs_str = \"\"\n",
-    "\n",
-    "for movie, songs in data.items():\n",
-    "    for song in songs:\n",
-    "        movie_and_name = f\"{movie};{song['name']}\".lower()\n",
-    "        songs_str += f\"{movie_and_name}:{song['text']}\\n\"\n",
-    "        movies_and_names_to_songs[movie_and_name] = song"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "id": "32cd1a47",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# prompt.format(songs=songs_str, user_input=\"I am feeling great today\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "a056e5e9",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'[\"coco;remember me (dúo)\", \"mulan;reflection\", \"frozen;do you want to build a snowman?\"]'"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "res = chain.run(songs=songs_str, user_input=\"I am sad\")\n",
-    "res"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "e84eeeaa",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import random\n",
-    "\n",
-    "res = random.choice(eval(res))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "e24ed65f",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "frozen;do you want to build a snowman?\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "        <iframe\n",
-       "            width=\"700\"\n",
-       "            height=\"350\"\n",
-       "            src=\"https://open.spotify.com/embed/track/2yi7HZrBOC4bMUSTcs4VK6?utm_source=generator\"\n",
-       "            frameborder=\"0\"\n",
-       "            allowfullscreen\n",
-       "            \n",
-       "        ></iframe>\n",
-       "        "
-      ],
-      "text/plain": [
-       "<IPython.lib.display.IFrame at 0x7f54178b9d00>"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "print(res)\n",
-    "doc = movies_and_names_to_songs[res]\n",
-    "\n",
-    "from IPython.display import IFrame\n",
-    "IFrame(doc[\"embed_url\"], width=700, height=350)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "03de1b93",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.16"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}