Spaces:

anhkhoiphan
/

Kumiko_v2.0

Sleeping

App Files Files Community

anhkhoiphan commited on Oct 10

Commit

ca4a5bd

1 Parent(s): 5e9162a

Revert lại hoàn toàn phần image captioning

Browse files

Files changed (4) hide show

README.md +3 -3
app.py +45 -82
image_captioning.py +59 -59
requirements.txt +9 -9

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: Kumiko V2.0
-emoji: 🏃
-colorFrom: green
-colorTo: gray
 sdk: docker
 pinned: false
 license: mit

 ---
 title: Kumiko V2.0
+emoji: 🏆
+colorFrom: blue
+colorTo: yellow
 sdk: docker
 pinned: false
 license: mit

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Chainlit UI for AI Agent with Login/Registration and Image Support
 """
 import chainlit as cl
@@ -7,8 +7,6 @@ import requests
 from typing import Optional, Dict
 import json
 import os
-import asyncio
-from image_captioning import image_captioning
 # API Configuration
 API_BASE_URL = os.getenv("API_BASE_URL")
@@ -39,7 +37,7 @@ def register_user(user_id: str) -> Dict:
         if response.status_code == 201:
             return {"success": True, "message": response.json().get("message", "Đăng ký thành công!")}
         else:
-            error_msg = response.json().get("detail", "Đăng ký thất bại. Hãy thử user_id khác!")
             return {"success": False, "message": error_msg}
     except Exception as e:
         return {"success": False, "message": f"Lỗi kết nối: {str(e)}"}
@@ -50,8 +48,7 @@ def send_chat_message(query: str, user_id: str) -> Dict:
     try:
         response = requests.post(
             f"{API_BASE_URL}/chat",
-            json={"query": query, "user_id": user_id},
-            timeout=180
         )
         if response.status_code == 200:
             data = response.json()
@@ -63,27 +60,10 @@ def send_chat_message(query: str, user_id: str) -> Dict:
         else:
             error_msg = response.json().get("detail", "Lỗi xử lý câu hỏi")
             return {"success": False, "message": error_msg}
-    except requests.exceptions.Timeout:
-        return {"success": False, "message": "⏱️ Server đang khởi động (cold start), vui lòng thử lại sau 30 giây!"}
     except Exception as e:
         return {"success": False, "message": f"Lỗi kết nối API: {str(e)}"}
-# ============================================
-# IMAGE PROCESSING
-# ============================================
-async def process_image(image_path: str) -> str:
-    """Process image and return caption"""
-    try:
-        loop = asyncio.get_event_loop()
-        caption = await loop.run_in_executor(None, image_captioning, image_path)
-        return caption
-    except Exception as e:
-        return f"(Lỗi khi xử lý ảnh: {str(e)})"
 # ============================================
 # CHAINLIT EVENT HANDLERS
 # ============================================
@@ -91,6 +71,7 @@ async def process_image(image_path: str) -> str:
 @cl.on_chat_start
 async def start():
     """Initialize chat session"""
     user_id = cl.user_session.get("user_id")
     if not user_id:
@@ -152,10 +133,12 @@ async def main(message: cl.Message):
     content = message.content.strip()
     if content.startswith("/"):
         await handle_command(content)
         return
     if not user_id or awaiting_auth:
         await cl.Message(
             content="⚠️ Vui lòng đăng nhập trước khi chat.\n\nGõ `/login <user_id>` hoặc `/register <user_id>`",
@@ -163,47 +146,8 @@ async def main(message: cl.Message):
         ).send()
         return
-    await process_chat_message_with_image(message, user_id)
-async def process_chat_message_with_image(message: cl.Message, user_id: str):
-    """Process chat message with optional image"""
-    query = message.content.strip() if message.content else ""
-    images = [file for file in message.elements if file.mime and file.mime.startswith("image/")]
-    if images:
-        processing_msg = cl.Message(content="🖼️ Đang xử lý ảnh...", author="Kumiko")
-        await processing_msg.send()
-        image_captions = []
-        for img in images:
-            try:
-                caption = await process_image(img.path)
-                image_captions.append(caption)
-            except Exception as e:
-                image_captions.append(f"(Lỗi khi xử lý ảnh: {str(e)})")
-        await processing_msg.remove()
-        combined_captions = "\n\n".join(image_captions)
-        if query:
-            final_query = f"{query}\n\n{combined_captions}"
-        else:
-            final_query = combined_captions
-    else:
-        final_query = query
-    if not final_query:
-        await cl.Message(
-            content="⚠️ Vui lòng nhập câu hỏi hoặc gửi ảnh!",
-            author="Kumiko"
-        ).send()
-        return
-    await process_chat_message(final_query, user_id)
 async def handle_command(command: str):
@@ -211,6 +155,7 @@ async def handle_command(command: str):
     parts = command.split(maxsplit=1)
     cmd = parts[0].lower()
     if cmd == "/login":
         if len(parts) < 2:
             await cl.Message(content="❌ Sử dụng: `/login <user_id>`", author="Kumiko").send()
@@ -222,22 +167,22 @@ async def handle_command(command: str):
             await cl.Message(content="❌ User ID phải có ít nhất 3 ký tự!", author="Kumiko").send()
             return
         loading_msg = cl.Message(content="🔍 Đang kiểm tra tài khoản...", author="Kumiko")
         await loading_msg.send()
-        loop = asyncio.get_event_loop()
-        exists = await loop.run_in_executor(None, check_user_exists, user_id)
-        await loading_msg.remove()
-        if exists:
             await show_chat_interface(user_id)
         else:
             await cl.Message(
                 content=f"❌ User ID `{user_id}` không tồn tại!\n\nVui lòng đăng ký bằng: `/register {user_id}`",
                 author="Kumiko"
             ).send()
     elif cmd == "/register":
         if len(parts) < 2:
             await cl.Message(content="❌ Sử dụng: `/register <user_id>`", author="Kumiko").send()
@@ -249,12 +194,12 @@ async def handle_command(command: str):
             await cl.Message(content="❌ User ID phải có ít nhất 3 ký tự!", author="Kumiko").send()
             return
         loading_msg = cl.Message(content="📝 Đang đăng ký tài khoản...", author="Kumiko")
         await loading_msg.send()
-        loop = asyncio.get_event_loop()
-        result = await loop.run_in_executor(None, register_user, user_id)
         await loading_msg.remove()
         if result["success"]:
@@ -263,12 +208,14 @@ async def handle_command(command: str):
         else:
             await cl.Message(content=f"❌ {result['message']}", author="Kumiko").send()
     elif cmd == "/logout":
         cl.user_session.set("user_id", None)
         cl.user_session.set("awaiting_auth", True)
         await cl.Message(content="👋 Đã đăng xuất thành công!", author="Kumiko").send()
         await show_login_screen()
     elif cmd == "/help":
         help_msg = """
 #### 📖 Hướng dẫn sử dụng
@@ -282,12 +229,7 @@ async def handle_command(command: str):
 - `/help` - Hiển thị hướng dẫn này
 #### Chat:
-Sau khi đăng nhập, bạn có thể:
-- Chat bình thường với text
-- **Gửi ảnh** (kèm hoặc không kèm text) để mình phân tích
-- Gửi nhiều ảnh cùng lúc
-Hệ thống sẽ trả lời và cung cấp tài liệu tham khảo.
 """
         await cl.Message(content=help_msg, author="Kumiko").send()
@@ -297,12 +239,14 @@ Hệ thống sẽ trả lời và cung cấp tài liệu tham khảo.
 async def process_chat_message(query: str, user_id: str):
     """Process chat message and display response"""
     thinking_msg = cl.Message(content="🤔 Đang suy nghĩ...", author="Kumiko")
     await thinking_msg.send()
-    loop = asyncio.get_event_loop()
-    result = await loop.run_in_executor(None, send_chat_message, query, user_id)
     await thinking_msg.remove()
     if not result["success"]:
@@ -312,8 +256,10 @@ async def process_chat_message(query: str, user_id: str):
     answer = result["answer"]
     references = result["references"]
     cl.user_session.set("last_references", references)
     actions = [
         cl.Action(
             name="show_references",
@@ -324,6 +270,7 @@ async def process_chat_message(query: str, user_id: str):
         )
     ]
     await cl.Message(
         content=answer,
         actions=actions,
@@ -334,13 +281,16 @@ async def process_chat_message(query: str, user_id: str):
 @cl.action_callback("show_references")
 async def on_show_references(action: cl.Action):
     """Handle reference button click"""
     references = action.payload.get("references", "Không có tài liệu tham khảo")
     await cl.Message(
         content=references,
         author="Kumiko"
     ).send()
     await action.remove()
@@ -371,4 +321,17 @@ async def set_starters():
             label="Trợ giúp",
             message="/help",
         ),
-    ]

 """
+Chainlit UI for AI Agent with Login/Registration
 """
 import chainlit as cl
 from typing import Optional, Dict
 import json
 import os
 # API Configuration
 API_BASE_URL = os.getenv("API_BASE_URL")
         if response.status_code == 201:
             return {"success": True, "message": response.json().get("message", "Đăng ký thành công!")}
         else:
+            error_msg = response.json().get("detail", "Đăng ký thất bại")
             return {"success": False, "message": error_msg}
     except Exception as e:
         return {"success": False, "message": f"Lỗi kết nối: {str(e)}"}
     try:
         response = requests.post(
             f"{API_BASE_URL}/chat",
+            json={"query": query, "user_id": user_id}
         )
         if response.status_code == 200:
             data = response.json()
         else:
             error_msg = response.json().get("detail", "Lỗi xử lý câu hỏi")
             return {"success": False, "message": error_msg}
     except Exception as e:
         return {"success": False, "message": f"Lỗi kết nối API: {str(e)}"}
 # ============================================
 # CHAINLIT EVENT HANDLERS
 # ============================================
 @cl.on_chat_start
 async def start():
     """Initialize chat session"""
+    # Check if user is already logged in
     user_id = cl.user_session.get("user_id")
     if not user_id:
     content = message.content.strip()
+    # Handle authentication commands
     if content.startswith("/"):
         await handle_command(content)
         return
+    # Check if user is logged in
     if not user_id or awaiting_auth:
         await cl.Message(
             content="⚠️ Vui lòng đăng nhập trước khi chat.\n\nGõ `/login <user_id>` hoặc `/register <user_id>`",
         ).send()
         return
+    # Process chat message
+    await process_chat_message(content, user_id)
 async def handle_command(command: str):
     parts = command.split(maxsplit=1)
     cmd = parts[0].lower()
+    # Login command
     if cmd == "/login":
         if len(parts) < 2:
             await cl.Message(content="❌ Sử dụng: `/login <user_id>`", author="Kumiko").send()
             await cl.Message(content="❌ User ID phải có ít nhất 3 ký tự!", author="Kumiko").send()
             return
+        # Show loading message
         loading_msg = cl.Message(content="🔍 Đang kiểm tra tài khoản...", author="Kumiko")
         await loading_msg.send()
+        # Check if user exists
+        if check_user_exists(user_id):
+            await loading_msg.remove()
             await show_chat_interface(user_id)
         else:
+            await loading_msg.remove()
             await cl.Message(
                 content=f"❌ User ID `{user_id}` không tồn tại!\n\nVui lòng đăng ký bằng: `/register {user_id}`",
                 author="Kumiko"
             ).send()
+    # Register command
     elif cmd == "/register":
         if len(parts) < 2:
             await cl.Message(content="❌ Sử dụng: `/register <user_id>`", author="Kumiko").send()
             await cl.Message(content="❌ User ID phải có ít nhất 3 ký tự!", author="Kumiko").send()
             return
+        # Show loading message
         loading_msg = cl.Message(content="📝 Đang đăng ký tài khoản...", author="Kumiko")
         await loading_msg.send()
+        # Register user
+        result = register_user(user_id)
         await loading_msg.remove()
         if result["success"]:
         else:
             await cl.Message(content=f"❌ {result['message']}", author="Kumiko").send()
+    # Logout command
     elif cmd == "/logout":
         cl.user_session.set("user_id", None)
         cl.user_session.set("awaiting_auth", True)
         await cl.Message(content="👋 Đã đăng xuất thành công!", author="Kumiko").send()
         await show_login_screen()
+    # Help command
     elif cmd == "/help":
         help_msg = """
 #### 📖 Hướng dẫn sử dụng
 - `/help` - Hiển thị hướng dẫn này
 #### Chat:
+Sau khi đăng nhập, bạn có thể chat bình thường. Hệ thống sẽ trả lời và cung cấp tài liệu tham khảo.
 """
         await cl.Message(content=help_msg, author="Kumiko").send()
 async def process_chat_message(query: str, user_id: str):
     """Process chat message and display response"""
+    # Show thinking message
     thinking_msg = cl.Message(content="🤔 Đang suy nghĩ...", author="Kumiko")
     await thinking_msg.send()
+    # Send request to API
+    result = send_chat_message(query, user_id)
+    # Remove thinking message
     await thinking_msg.remove()
     if not result["success"]:
     answer = result["answer"]
     references = result["references"]
+    # Store references in session for the button
     cl.user_session.set("last_references", references)
+    # Create message with action button
     actions = [
         cl.Action(
             name="show_references",
         )
     ]
+    # Send answer with reference button
     await cl.Message(
         content=answer,
         actions=actions,
 @cl.action_callback("show_references")
 async def on_show_references(action: cl.Action):
     """Handle reference button click"""
+    # Get references from action payload
     references = action.payload.get("references", "Không có tài liệu tham khảo")
+    # Send references as a new message
     await cl.Message(
         content=references,
         author="Kumiko"
     ).send()
+    # Optional: Remove the action button after clicking
     await action.remove()
             label="Trợ giúp",
             message="/help",
         ),
+    ]
+# Custom CSS (optional - save as .chainlit/config.toml)
+# """
+# [UI]
+# name = "AI Agent Chat"
+# default_collapse_content = true
+# default_expand_messages = false
+# hide_cot = false
+# [UI.theme]
+# primary_color = "#2563eb"
+# """

image_captioning.py CHANGED Viewed

@@ -1,74 +1,74 @@
-def image_captioning(image_path, prompt=None):
-    """
-    Tạo caption tiếng Anh cho ảnh sử dụng SmolVLM2 500M
-    Args:
-        image_path: Đường dẫn đến file ảnh
-        prompt: Custom prompt cho model
-    Returns:
-        String caption với format "(Người dùng gửi ảnh với nội dung: ...)"
-    """
-    from transformers import AutoProcessor, AutoModelForImageTextToText
-    import torch
-    if prompt is None:
-        prompt = """Describe this image in detail, including:
-- People: count, appearance, clothing, actions
-- Famous individuals or celebrities (if any)
-- Objects and notable items
-- Location or famous landmarks (if recognizable)
-- Overall scene and atmosphere
-- Main focus/subject of the image"""
-    model_name = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
-    # ✅ Phải có trust_remote_code=True
-    processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    model = AutoModelForImageTextToText.from_pretrained(
-        model_name,
-        torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32,
-        _attn_implementation="sdpa",  # ✅ Tương thích, không cần flash-attn
-        trust_remote_code=True
-    ).to(device)
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                {"type": "image", "path": image_path},
-                {"type": "text", "text": prompt}
-            ]
-        }
-    ]
-    inputs = processor.apply_chat_template(
-        messages,
-        add_generation_prompt=True,
-        tokenize=True,
-        return_dict=True,
-        return_tensors="pt",
-    ).to(model.device, dtype=torch.bfloat16)
-    generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=128)
-    generated_texts = processor.batch_decode(
-        generated_ids,
-        skip_special_tokens=True,
-    )
-    raw_output = generated_texts[0].strip()
-    # ✅ Cắt bỏ phần prompt "User: ... Assistant:" nếu có
-    if "Assistant:" in raw_output:
-        caption = raw_output.split("Assistant:")[-1].strip()
-    else:
-        caption = raw_output
-    # 🪶 Chuẩn hóa xuống 1 dòng (loại bỏ \n thừa)
-    caption = " ".join(caption.split())
-    return f"(Người dùng gửi ảnh với nội dung: {caption})"

+# def image_captioning(image_path, prompt=None):
+#     """
+#     Tạo caption tiếng Anh cho ảnh sử dụng SmolVLM2 500M
+#     Args:
+#         image_path: Đường dẫn đến file ảnh
+#         prompt: Custom prompt cho model
+#     Returns:
+#         String caption với format "(Người dùng gửi ảnh với nội dung: ...)"
+#     """
+#     from transformers import AutoProcessor, AutoModelForImageTextToText
+#     import torch
+#     if prompt is None:
+#         prompt = """Describe this image in detail, including:
+# - People: count, appearance, clothing, actions
+# - Famous individuals or celebrities (if any)
+# - Objects and notable items
+# - Location or famous landmarks (if recognizable)
+# - Overall scene and atmosphere
+# - Main focus/subject of the image"""
+#     model_name = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
+#     # ✅ Phải có trust_remote_code=True
+#     processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
+#     device = "cuda" if torch.cuda.is_available() else "cpu"
+#     model = AutoModelForImageTextToText.from_pretrained(
+#         model_name,
+#         torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32,
+#         _attn_implementation="sdpa",  # ✅ Tương thích, không cần flash-attn
+#         trust_remote_code=True
+#     ).to(device)
+#     messages = [
+#         {
+#             "role": "user",
+#             "content": [
+#                 {"type": "image", "path": image_path},
+#                 {"type": "text", "text": prompt}
+#             ]
+#         }
+#     ]
+#     inputs = processor.apply_chat_template(
+#         messages,
+#         add_generation_prompt=True,
+#         tokenize=True,
+#         return_dict=True,
+#         return_tensors="pt",
+#     ).to(model.device, dtype=torch.bfloat16)
+#     generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=128)
+#     generated_texts = processor.batch_decode(
+#         generated_ids,
+#         skip_special_tokens=True,
+#     )
+#     raw_output = generated_texts[0].strip()
+#     # ✅ Cắt bỏ phần prompt "User: ... Assistant:" nếu có
+#     if "Assistant:" in raw_output:
+#         caption = raw_output.split("Assistant:")[-1].strip()
+#     else:
+#         caption = raw_output
+#     # 🪶 Chuẩn hóa xuống 1 dòng (loại bỏ \n thừa)
+#     caption = " ".join(caption.split())
+#     return f"(Người dùng gửi ảnh với nội dung: {caption})"

requirements.txt CHANGED Viewed

@@ -3,12 +3,12 @@ requests
 websockets
 # Dùng xử lý ảnh
-transformers>=4.45.0
-pillow
-torch
-accelerate
-sentencepiece
-protobuf
-timm
-decord
-num2words

 websockets
 # Dùng xử lý ảnh
+# transformers>=4.45.0
+# pillow
+# torch
+# accelerate
+# sentencepiece
+# protobuf
+# timm
+# decord
+# num2words