Spaces:

baidu
/

web_search_demo

Running

App Files Files Community

maxiaolong03 commited on Jul 19

Commit

1b0a1e0

1 Parent(s): e6723e9

add files

Browse files

Files changed (2) hide show

app.py +109 -24
bot_requests.py +88 -77

app.py CHANGED Viewed

@@ -99,10 +99,21 @@ def get_args() -> argparse.Namespace:
     """
     parser = ArgumentParser(description="ERNIE models web chat demo.")
-    parser.add_argument("--server-port", type=int, default=7860, help="Demo server port.")
-    parser.add_argument("--server-name", type=str, default="0.0.0.0", help="Demo server name.")
-    parser.add_argument("--max_char", type=int, default=20000, help="Maximum character limit for messages.")
-    parser.add_argument("--max_retry_num", type=int, default=3, help="Maximum retry number for request.")
     parser.add_argument(
         "--model_map",
         type=str,
@@ -124,9 +135,23 @@ def get_args() -> argparse.Namespace:
         default="https://qianfan.baidubce.com/v2/ai_search/chat/completions",
         help="Web Search Service URL.",
     )
-    parser.add_argument("--qianfan_api_key", type=str, default=os.environ.get("API_SEARCH_KEY"), help="QianFan API Key.")
     parser.add_argument(
-        "--max_crawler_threads", type=int, default=10, help="The maximum number of concurrent crawler threads."
     )
     args = parser.parse_args()
@@ -170,7 +195,9 @@ class GradioEvents:
         return conversation, conversation_str
     @staticmethod
-    def get_search_query(conversation: list, model_name: str, bot_client: BotClient) -> dict:
         """
         Determines if a web search is needed by analyzing conversation context.
         Processes model response to extract structured search decision and queries.
@@ -224,21 +251,29 @@ class GradioEvents:
         Yields:
             dict: A dictionary containing the event type and its corresponding content.
         """
-        conversation, conversation_str = GradioEvents.get_history_conversation(task_history)
         # Step 1: Determine whether a search is needed and obtain the corresponding query list
         search_info_res = {}
         if search_state:
             search_info_message = SEARCH_INFO_PROMPT.format(
-                date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), context=conversation_str, query=query
             )
             search_conversation = [{"role": "user", "content": search_info_message}]
-            search_info_res = GradioEvents.get_search_query(search_conversation, model_name, bot_client)
             if search_info_res is None:
                 search_info_res = {"is_search": True, "query_list": [query]}
         # Step 2: If a search is needed, obtain the corresponding query results
-        if search_info_res.get("is_search", False) and search_info_res.get("query_list", []):
             yield {"type": "search_result", "content": "🧐 努力搜索中... ✨"}
             search_result = bot_client.get_web_search_res(search_info_res["query_list"])
@@ -367,7 +402,13 @@ class GradioEvents:
         chatbot.pop(-1)
         async for chunk, search_result in GradioEvents.predict(
-            item[0], chatbot, task_history, model, search_state, max_crawler_threads, bot_client
         ):
             yield chunk, search_result
@@ -434,7 +475,10 @@ class GradioEvents:
     @staticmethod
     async def get_complete_search_content(
-        search_results: list, max_crawler_threads: int, bot_client: BotClient, max_search_results_char: int = 18000
     ) -> str:
         """
         Combines and formats multiple search results into a single string.
@@ -528,14 +572,22 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient):
 <a href="https://yiyan.baidu.com/blog/publication/">Technical Report</a></center>"""
         )
-        chatbot = gr.Chatbot(label="ERNIE", elem_classes="control-height", type="messages")
-        search_result = gr.Textbox(label="Search Result", lines=10, max_lines=10, visible=True)
-        search_check = gr.Checkbox(label="🌐 Search the web(联网搜索)", value=True, interactive=True)
         with gr.Row():
-            query = gr.Textbox(label="Input", lines=1, scale=6, elem_classes="input-textbox")
         with gr.Row():
             empty_btn = gr.Button("🧹 Clear History(清除历史)")
@@ -546,33 +598,66 @@ def launch_demo(args: argparse.Namespace, bot_client: BotClient):
         model_name = gr.State(next(iter(args.model_map.keys())))
         max_crawler_threads = gr.State(args.max_crawler_threads)
-        search_check.change(fn=GradioEvents.search_toggle_state, inputs=search_check, outputs=search_result)
         predict_with_clients = partial(GradioEvents.predict, bot_client=bot_client)
-        regenerate_with_clients = partial(GradioEvents.regenerate, bot_client=bot_client)
         query.submit(
             predict_with_clients,
-            inputs=[query, chatbot, task_history, model_name, search_check, max_crawler_threads],
             outputs=[chatbot, search_result],
             show_progress=True,
         )
         query.submit(GradioEvents.reset_user_input, [], [query])
         submit_btn.click(
             predict_with_clients,
-            inputs=[query, chatbot, task_history, model_name, search_check, max_crawler_threads],
             outputs=[chatbot, search_result],
             show_progress=True,
         )
         submit_btn.click(GradioEvents.reset_user_input, [], [query])
-        empty_btn.click(GradioEvents.reset_state, outputs=[chatbot, task_history, search_result], show_progress=True)
         regen_btn.click(
             regenerate_with_clients,
-            inputs=[chatbot, task_history, model_name, search_check, max_crawler_threads],
             outputs=[chatbot, search_result],
             show_progress=True,
         )
-    demo.queue().launch(server_port=args.server_port, server_name=args.server_name)
 def main():

     """
     parser = ArgumentParser(description="ERNIE models web chat demo.")
+    parser.add_argument(
+        "--server-port", type=int, default=7860, help="Demo server port."
+    )
+    parser.add_argument(
+        "--server-name", type=str, default="0.0.0.0", help="Demo server name."
+    )
+    parser.add_argument(
+        "--max_char",
+        type=int,
+        default=20000,
+        help="Maximum character limit for messages.",
+    )
+    parser.add_argument(
+        "--max_retry_num", type=int, default=3, help="Maximum retry number for request."
+    )
     parser.add_argument(
         "--model_map",
         type=str,
         default="https://qianfan.baidubce.com/v2/ai_search/chat/completions",
         help="Web Search Service URL.",
     )
     parser.add_argument(
+        "--qianfan_api_key",
+        type=str,
+        default=os.environ.get("API_SEARCH_KEY"),
+        help="QianFan API Key.",
+    )
+    parser.add_argument(
+        "--max_crawler_threads",
+        type=int,
+        default=10,
+        help="The maximum number of concurrent crawler threads.",
+    )
+    parser.add_argument(
+        "--concurrency_limit", type=int, default=10, help="Default concurrency limit."
+    )
+    parser.add_argument(
+        "--max_queue_size", type=int, default=50, help="Maximum queue size for request."
     )
     args = parser.parse_args()
         return conversation, conversation_str
     @staticmethod
+    def get_search_query(
+        conversation: list, model_name: str, bot_client: BotClient
+    ) -> dict:
         """
         Determines if a web search is needed by analyzing conversation context.
         Processes model response to extract structured search decision and queries.
         Yields:
             dict: A dictionary containing the event type and its corresponding content.
         """
+        conversation, conversation_str = GradioEvents.get_history_conversation(
+            task_history
+        )
         # Step 1: Determine whether a search is needed and obtain the corresponding query list
         search_info_res = {}
         if search_state:
             search_info_message = SEARCH_INFO_PROMPT.format(
+                date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+                context=conversation_str,
+                query=query,
             )
             search_conversation = [{"role": "user", "content": search_info_message}]
+            search_info_res = GradioEvents.get_search_query(
+                search_conversation, model_name, bot_client
+            )
             if search_info_res is None:
                 search_info_res = {"is_search": True, "query_list": [query]}
         # Step 2: If a search is needed, obtain the corresponding query results
+        if search_info_res.get("is_search", False) and search_info_res.get(
+            "query_list", []
+        ):
             yield {"type": "search_result", "content": "🧐 努力搜索中... ✨"}
             search_result = bot_client.get_web_search_res(search_info_res["query_list"])
         chatbot.pop(-1)
         async for chunk, search_result in GradioEvents.predict(
+            item[0],
+            chatbot,
+            task_history,
+            model,
+            search_state,
+            max_crawler_threads,
+            bot_client,
         ):
             yield chunk, search_result
     @staticmethod
     async def get_complete_search_content(
+        search_results: list,
+        max_crawler_threads: int,
+        bot_client: BotClient,
+        max_search_results_char: int = 18000,
     ) -> str:
         """
         Combines and formats multiple search results into a single string.
 <a href="https://yiyan.baidu.com/blog/publication/">Technical Report</a></center>"""
         )
+        chatbot = gr.Chatbot(
+            label="ERNIE", elem_classes="control-height", type="messages"
+        )
+        search_result = gr.Textbox(
+            label="Search Result", lines=10, max_lines=10, visible=True
+        )
+        search_check = gr.Checkbox(
+            label="🌐 Search the web(联网搜索)", value=True, interactive=True
+        )
         with gr.Row():
+            query = gr.Textbox(
+                label="Input", lines=1, scale=6, elem_classes="input-textbox"
+            )
         with gr.Row():
             empty_btn = gr.Button("🧹 Clear History(清除历史)")
         model_name = gr.State(next(iter(args.model_map.keys())))
         max_crawler_threads = gr.State(args.max_crawler_threads)
+        search_check.change(
+            fn=GradioEvents.search_toggle_state,
+            inputs=search_check,
+            outputs=search_result,
+        )
         predict_with_clients = partial(GradioEvents.predict, bot_client=bot_client)
+        regenerate_with_clients = partial(
+            GradioEvents.regenerate, bot_client=bot_client
+        )
         query.submit(
             predict_with_clients,
+            inputs=[
+                query,
+                chatbot,
+                task_history,
+                model_name,
+                search_check,
+                max_crawler_threads,
+            ],
             outputs=[chatbot, search_result],
             show_progress=True,
         )
         query.submit(GradioEvents.reset_user_input, [], [query])
         submit_btn.click(
             predict_with_clients,
+            inputs=[
+                query,
+                chatbot,
+                task_history,
+                model_name,
+                search_check,
+                max_crawler_threads,
+            ],
             outputs=[chatbot, search_result],
             show_progress=True,
         )
         submit_btn.click(GradioEvents.reset_user_input, [], [query])
+        empty_btn.click(
+            GradioEvents.reset_state,
+            outputs=[chatbot, task_history, search_result],
+            show_progress=True,
+        )
         regen_btn.click(
             regenerate_with_clients,
+            inputs=[
+                chatbot,
+                task_history,
+                model_name,
+                search_check,
+                max_crawler_threads,
+            ],
             outputs=[chatbot, search_result],
             show_progress=True,
         )
+    demo.queue(
+        default_concurrency_limit=args.concurrency_limit, max_size=args.max_queue_size
+    )
+    demo.launch(server_port=args.server_port, server_name=args.server_name)
 def main():

bot_requests.py CHANGED Viewed

@@ -16,20 +16,22 @@
 import os
 import argparse
 import logging
 import traceback
-import json
 import jieba
 from openai import OpenAI
-import requests
-class BotClient(object):
     """Client for interacting with various AI models."""
     def __init__(self, args: argparse.Namespace):
         """
-        Initializes the BotClient instance by configuring essential parameters from command line arguments
-        including retry limits, character constraints, model endpoints and API credentials while setting up
         default values for missing arguments to ensure robust operation.
         Args:
@@ -37,25 +39,29 @@ class BotClient(object):
                                       Uses getattr() to safely retrieve values with fallback defaults.
         """
         self.logger = logging.getLogger(__name__)
-        self.max_retry_num = getattr(args, 'max_retry_num', 3)
-        self.max_char = getattr(args, 'max_char', 8000)
-        self.model_map = getattr(args, 'model_map', {})
         self.api_key = os.environ.get("API_KEY")
-        self.embedding_service_url = getattr(args, 'embedding_service_url', 'embedding_service_url')
-        self.embedding_model = getattr(args, 'embedding_model', 'embedding_model')
-        self.web_search_service_url = getattr(args, 'web_search_service_url', 'web_search_service_url')
-        self.max_search_results_num = getattr(args, 'max_search_results_num', 15)
         self.qianfan_api_key = os.environ.get("API_SEARCH_KEY")
     def call_back(self, host_url: str, req_data: dict) -> dict:
         """
-        Executes an HTTP request to the specified endpoint using the OpenAI client, handles the response
-        conversion to a compatible dictionary format, and manages any exceptions that may occur during
         the request process while logging errors appropriately.
         Args:
@@ -68,20 +74,18 @@ class BotClient(object):
         """
         try:
             client = OpenAI(base_url=host_url, api_key=self.api_key)
-            response = client.chat.completions.create(
-                **req_data
-            )
             # Convert OpenAI response to compatible format
             return response.model_dump()
         except Exception as e:
-            self.logger.error("Stream request failed: {}".format(e))
             raise
     def call_back_stream(self, host_url: str, req_data: dict) -> dict:
         """
-        Makes a streaming HTTP request to the specified host URL using the OpenAI client and yields response chunks
         in real-time while handling any exceptions that may occur during the streaming process.
         Args:
@@ -100,25 +104,25 @@ class BotClient(object):
             for chunk in response:
                 if not chunk.choices:
                     continue
                 # Convert OpenAI response to compatible format
                 yield chunk.model_dump()
         except Exception as e:
-            self.logger.error("Stream request failed: {}".format(e))
             raise
     def process(
-        self,
-        model_name: str,
-        req_data: dict,
-        max_tokens: int=2048,
-        temperature: float=1.0,
-        top_p: float=0.7
     ) -> dict:
         """
-        Handles chat completion requests by mapping the model name to its endpoint, preparing request parameters
-        including token limits and sampling settings, truncating messages to fit character limits, making API calls
         with built-in retry mechanism, and logging the full request/response cycle for debugging purposes.
         Args:
@@ -140,7 +144,7 @@ class BotClient(object):
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         for _ in range(self.max_retry_num):
             try:
-                self.logger.info("[MODEL] {}".format(model_url))
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
                 res = self.call_back(model_url, req_data)
@@ -153,15 +157,16 @@ class BotClient(object):
                 res = {}
             if len(res) != 0 and "error" not in res:
                 break
         return res
     def process_stream(
-        self, model_name: str,
-        req_data: dict,
-        max_tokens: int=2048,
-        temperature: float=1.0,
-        top_p: float=0.7
     ) -> dict:
         """
         Processes streaming requests by mapping the model name to its endpoint, configuring request parameters,
@@ -184,29 +189,30 @@ class BotClient(object):
         req_data["temperature"] = temperature
         req_data["top_p"] = top_p
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         last_error = None
         for _ in range(self.max_retry_num):
             try:
-                self.logger.info("[MODEL] {}".format(model_url))
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
-                for chunk in self.call_back_stream(model_url, req_data):
-                    yield chunk
                 return
             except Exception as e:
                 last_error = e
-                self.logger.error("Stream request failed (attempt {}/{}): {}".format(_ + 1, self.max_retry_num, e))
         self.logger.error("All retry attempts failed for stream request")
         yield {"error": str(last_error)}
     def cut_chinese_english(self, text: str) -> list:
         """
-        Segments mixed Chinese and English text into individual components using Jieba for Chinese words
-        while preserving English words as whole units, with special handling for Unicode character ranges
         to distinguish between the two languages.
         Args:
@@ -219,7 +225,9 @@ class BotClient(object):
         en_ch_words = []
         for word in words:
-            if word.isalpha() and not any("\u4e00" <= char <= "\u9fff" for char in word):
                 en_ch_words.append(word)
             else:
                 en_ch_words.extend(list(word))
@@ -239,10 +247,10 @@ class BotClient(object):
         """
         if not messages:
             return messages
         processed = []
         total_units = 0
         for msg in messages:
             # Handle two different content formats
             if isinstance(msg["content"], str):
@@ -251,31 +259,33 @@ class BotClient(object):
                 text_content = msg["content"][1]["text"]
             else:
                 text_content = ""
             # Calculate unit count after tokenization
             units = self.cut_chinese_english(text_content)
             unit_count = len(units)
-            processed.append({
-                "role": msg["role"],
-                "original_content": msg["content"],  # Preserve original content
-                "text_content": text_content,        # Extracted plain text
-                "units": units,
-                "unit_count": unit_count
-            })
             total_units += unit_count
         if total_units <= self.max_char:
             return messages
         # Number of units to remove
         to_remove = total_units - self.max_char
         # 1. Truncate historical messages
         for i in range(len(processed) - 1, 1):
             if to_remove <= 0:
                 break
             # current = processed[i]
             if processed[i]["unit_count"] <= to_remove:
                 processed[i]["text_content"] = ""
@@ -293,7 +303,7 @@ class BotClient(object):
                 elif isinstance(processed[i]["original_content"], list):
                     processed[i]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 2. Truncate system message
         if to_remove > 0:
             system_msg = processed[0]
@@ -313,7 +323,7 @@ class BotClient(object):
                 elif isinstance(processed[0]["original_content"], list):
                     processed[0]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 3. Truncate last message
         if to_remove > 0 and len(processed) > 1:
             last_msg = processed[-1]
@@ -331,15 +341,12 @@ class BotClient(object):
                     last_msg["original_content"] = ""
                 elif isinstance(last_msg["original_content"], list):
                     last_msg["original_content"][1]["text"] = ""
         result = []
         for msg in processed:
             if msg["text_content"]:
-                result.append({
-                    "role": msg["role"],
-                    "content": msg["original_content"]
-                })
         return result
     def embed_fn(self, text: str) -> list:
@@ -352,7 +359,9 @@ class BotClient(object):
         Returns:
             list: A list of floats representing the embedding.
         """
-        client = OpenAI(base_url=self.embedding_service_url, api_key=self.qianfan_api_key)
         response = client.embeddings.create(input=[text], model=self.embedding_model)
         return response.data[0].embedding
@@ -368,7 +377,7 @@ class BotClient(object):
         """
         headers = {
             "Authorization": "Bearer " + self.qianfan_api_key,
-            "Content-Type": "application/json"
         }
         results = []
@@ -376,9 +385,11 @@ class BotClient(object):
         for query in query_list:
             payload = {
                 "messages": [{"role": "user", "content": query}],
-                "resource_type_filter": [{"type": "web", "top_k": top_k}]
             }
-            response = requests.post(self.web_search_service_url, headers=headers, json=payload)
             if response.status_code == 200:
                 response = response.json()
@@ -387,4 +398,4 @@ class BotClient(object):
             else:
                 self.logger.info(f"请求失败，状态码: {response.status_code}")
                 self.logger.info(response.text)
-        return results

 import os
 import argparse
+import json
 import logging
 import traceback
 import jieba
+import requests
 from openai import OpenAI
+class BotClient:
     """Client for interacting with various AI models."""
     def __init__(self, args: argparse.Namespace):
         """
+        Initializes the BotClient instance by configuring essential parameters from command line arguments
+        including retry limits, character constraints, model endpoints and API credentials while setting up
         default values for missing arguments to ensure robust operation.
         Args:
                                       Uses getattr() to safely retrieve values with fallback defaults.
         """
         self.logger = logging.getLogger(__name__)
+        self.max_retry_num = getattr(args, "max_retry_num", 3)
+        self.max_char = getattr(args, "max_char", 8000)
+        self.model_map = getattr(args, "model_map", {})
         self.api_key = os.environ.get("API_KEY")
+        self.embedding_service_url = getattr(
+            args, "embedding_service_url", "embedding_service_url"
+        )
+        self.embedding_model = getattr(args, "embedding_model", "embedding_model")
+        self.web_search_service_url = getattr(
+            args, "web_search_service_url", "web_search_service_url"
+        )
+        self.max_search_results_num = getattr(args, "max_search_results_num", 15)
         self.qianfan_api_key = os.environ.get("API_SEARCH_KEY")
     def call_back(self, host_url: str, req_data: dict) -> dict:
         """
+        Executes an HTTP request to the specified endpoint using the OpenAI client, handles the response
+        conversion to a compatible dictionary format, and manages any exceptions that may occur during
         the request process while logging errors appropriately.
         Args:
         """
         try:
             client = OpenAI(base_url=host_url, api_key=self.api_key)
+            response = client.chat.completions.create(**req_data)
             # Convert OpenAI response to compatible format
             return response.model_dump()
         except Exception as e:
+            self.logger.error(f"Stream request failed: {e}")
             raise
     def call_back_stream(self, host_url: str, req_data: dict) -> dict:
         """
+        Makes a streaming HTTP request to the specified host URL using the OpenAI client and yields response chunks
         in real-time while handling any exceptions that may occur during the streaming process.
         Args:
             for chunk in response:
                 if not chunk.choices:
                     continue
                 # Convert OpenAI response to compatible format
                 yield chunk.model_dump()
         except Exception as e:
+            self.logger.error(f"Stream request failed: {e}")
             raise
     def process(
+        self,
+        model_name: str,
+        req_data: dict,
+        max_tokens: int = 2048,
+        temperature: float = 1.0,
+        top_p: float = 0.7,
     ) -> dict:
         """
+        Handles chat completion requests by mapping the model name to its endpoint, preparing request parameters
+        including token limits and sampling settings, truncating messages to fit character limits, making API calls
         with built-in retry mechanism, and logging the full request/response cycle for debugging purposes.
         Args:
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         for _ in range(self.max_retry_num):
             try:
+                self.logger.info(f"[MODEL] {model_url}")
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
                 res = self.call_back(model_url, req_data)
                 res = {}
             if len(res) != 0 and "error" not in res:
                 break
         return res
     def process_stream(
+        self,
+        model_name: str,
+        req_data: dict,
+        max_tokens: int = 2048,
+        temperature: float = 1.0,
+        top_p: float = 0.7,
     ) -> dict:
         """
         Processes streaming requests by mapping the model name to its endpoint, configuring request parameters,
         req_data["temperature"] = temperature
         req_data["top_p"] = top_p
         req_data["messages"] = self.truncate_messages(req_data["messages"])
         last_error = None
         for _ in range(self.max_retry_num):
             try:
+                self.logger.info(f"[MODEL] {model_url}")
                 self.logger.info("[req_data]====>")
                 self.logger.info(json.dumps(req_data, ensure_ascii=False))
+                yield from self.call_back_stream(model_url, req_data)
                 return
             except Exception as e:
                 last_error = e
+                self.logger.error(
+                    f"Stream request failed (attempt {_ + 1}/{self.max_retry_num}): {e}"
+                )
         self.logger.error("All retry attempts failed for stream request")
         yield {"error": str(last_error)}
     def cut_chinese_english(self, text: str) -> list:
         """
+        Segments mixed Chinese and English text into individual components using Jieba for Chinese words
+        while preserving English words as whole units, with special handling for Unicode character ranges
         to distinguish between the two languages.
         Args:
         en_ch_words = []
         for word in words:
+            if word.isalpha() and not any(
+                "\u4e00" <= char <= "\u9fff" for char in word
+            ):
                 en_ch_words.append(word)
             else:
                 en_ch_words.extend(list(word))
         """
         if not messages:
             return messages
         processed = []
         total_units = 0
         for msg in messages:
             # Handle two different content formats
             if isinstance(msg["content"], str):
                 text_content = msg["content"][1]["text"]
             else:
                 text_content = ""
             # Calculate unit count after tokenization
             units = self.cut_chinese_english(text_content)
             unit_count = len(units)
+            processed.append(
+                {
+                    "role": msg["role"],
+                    "original_content": msg["content"],  # Preserve original content
+                    "text_content": text_content,  # Extracted plain text
+                    "units": units,
+                    "unit_count": unit_count,
+                }
+            )
             total_units += unit_count
         if total_units <= self.max_char:
             return messages
         # Number of units to remove
         to_remove = total_units - self.max_char
         # 1. Truncate historical messages
         for i in range(len(processed) - 1, 1):
             if to_remove <= 0:
                 break
             # current = processed[i]
             if processed[i]["unit_count"] <= to_remove:
                 processed[i]["text_content"] = ""
                 elif isinstance(processed[i]["original_content"], list):
                     processed[i]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 2. Truncate system message
         if to_remove > 0:
             system_msg = processed[0]
                 elif isinstance(processed[0]["original_content"], list):
                     processed[0]["original_content"][1]["text"] = new_text
                 to_remove = 0
         # 3. Truncate last message
         if to_remove > 0 and len(processed) > 1:
             last_msg = processed[-1]
                     last_msg["original_content"] = ""
                 elif isinstance(last_msg["original_content"], list):
                     last_msg["original_content"][1]["text"] = ""
         result = []
         for msg in processed:
             if msg["text_content"]:
+                result.append({"role": msg["role"], "content": msg["original_content"]})
         return result
     def embed_fn(self, text: str) -> list:
         Returns:
             list: A list of floats representing the embedding.
         """
+        client = OpenAI(
+            base_url=self.embedding_service_url, api_key=self.qianfan_api_key
+        )
         response = client.embeddings.create(input=[text], model=self.embedding_model)
         return response.data[0].embedding
         """
         headers = {
             "Authorization": "Bearer " + self.qianfan_api_key,
+            "Content-Type": "application/json",
         }
         results = []
         for query in query_list:
             payload = {
                 "messages": [{"role": "user", "content": query}],
+                "resource_type_filter": [{"type": "web", "top_k": top_k}],
             }
+            response = requests.post(
+                self.web_search_service_url, headers=headers, json=payload
+            )
             if response.status_code == 200:
                 response = response.json()
             else:
                 self.logger.info(f"请求失败，状态码: {response.status_code}")
                 self.logger.info(response.text)
+        return results