Update app.py
Browse files
app.py
CHANGED
|
@@ -20,12 +20,12 @@ import json
|
|
| 20 |
|
| 21 |
import base64
|
| 22 |
|
| 23 |
-
def
|
| 24 |
-
with open(
|
| 25 |
-
return
|
| 26 |
|
| 27 |
# ํ์ผ ์๋จ์์ ํ ๋ฒ๋ง ๋ก๋
|
| 28 |
-
|
| 29 |
|
| 30 |
# ํ๊ฒฝ ๋ณ์ ๋ก๋
|
| 31 |
load_dotenv()
|
|
@@ -52,13 +52,25 @@ st.set_page_config(
|
|
| 52 |
initial_sidebar_state="expanded"
|
| 53 |
)
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
# Custom CSS
|
| 56 |
st.markdown("""
|
| 57 |
<style>
|
| 58 |
[data-testid="stSidebar"] {
|
| 59 |
background: linear-gradient(180deg,
|
| 60 |
-
#
|
| 61 |
-
#
|
| 62 |
box-shadow: 4px 0 30px rgba(0,0,0,0.2);
|
| 63 |
width: 290px !important;
|
| 64 |
}
|
|
@@ -340,7 +352,7 @@ st.markdown("""
|
|
| 340 |
|
| 341 |
/* ์ฑํ
ํ์ดํ ์คํ์ผ (์ ๋๋ฉ์ด์
์ ๊ฑฐ) */
|
| 342 |
.chat-title {
|
| 343 |
-
color:
|
| 344 |
font-weight: 900 !important;
|
| 345 |
font-size: 1.75rem !important;
|
| 346 |
margin-bottom: 1rem !important;
|
|
@@ -439,8 +451,8 @@ def extract_text_from_pdf(pdf_file) -> Tuple[List[str], List[Dict], bytes, Dict]
|
|
| 439 |
pages_text = {}
|
| 440 |
|
| 441 |
# ==================== ์์ ๋ ์ฒญํฌ ์ค์ ====================
|
| 442 |
-
CHUNK_SIZE =
|
| 443 |
-
OVERLAP_SIZE =
|
| 444 |
# ========================================================
|
| 445 |
|
| 446 |
for page_num in range(len(doc)):
|
|
@@ -539,35 +551,58 @@ def create_vector_db(chunks: List[str], metadata_list: List[Dict]):
|
|
| 539 |
return collection, embedder
|
| 540 |
|
| 541 |
|
| 542 |
-
|
| 543 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 544 |
words_with_numbers = re.findall(r'[๊ฐ-ํฃ]*\d+[๊ฐ-ํฃ]*', text)
|
| 545 |
-
words = re.findall(r'[๊ฐ-ํฃ]{2,}', text)
|
| 546 |
-
|
| 547 |
-
stopwords = {
|
| 548 |
-
'๊ฒ', '๋ฑ', '๋ฐ', '๊ทธ', '์ด', '์ ', '์', '๋', '์ค', '๋ด', '๋
', '์', '์ผ',
|
| 549 |
-
'๊ฒฝ์ฐ', '๋ํ', 'ํตํด', '์ํด', '๊ด๋ จ', '์๋', 'ํ๋', '๋๋', '์ด๋ฐ', '์ ๋ฐ',
|
| 550 |
-
'์ด๋ค', '๋ฌด์จ', '์ด๋', '๋๊ตฌ', '์ธ์ ', '์ด๋', '๋ฌด์', '์ด๋ป๊ฒ', '์',
|
| 551 |
-
'์๋ ค', '์ค๋ช
', '๋งํด', '๋ํด', '๊ดํ์ฌ', '์๋์', '์ธ๊ฐ์', '๋ฌด์์ธ๊ฐ์',
|
| 552 |
-
'์ผ๋ง', '์
๋๊น', 'ํฉ๋๊น'
|
| 553 |
-
}
|
| 554 |
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
'๊ณต์ฌ', '์ฉ์ญ', '์ ์', '์
์ฐฐ', '๋์ฐฐ', '๊ฒฌ์ ', '๋จ๊ฐ'
|
| 558 |
-
}
|
| 559 |
|
| 560 |
-
|
| 561 |
-
|
| 562 |
|
| 563 |
-
|
| 564 |
-
if word in important_keywords:
|
| 565 |
-
word_freq[word] += 5
|
| 566 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 567 |
result = []
|
| 568 |
-
result.extend([w for w in words_with_numbers if w])
|
| 569 |
|
| 570 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 571 |
if word not in result:
|
| 572 |
result.append(word)
|
| 573 |
if len(result) >= top_n:
|
|
@@ -576,9 +611,9 @@ def extract_keywords(text: str, top_n: int = 5) -> List[str]:
|
|
| 576 |
return result[:top_n]
|
| 577 |
|
| 578 |
|
| 579 |
-
# ====================
|
| 580 |
def hybrid_search(query: str, collection, embedder, top_k: int = 3) -> Dict:
|
| 581 |
-
"""ํ์ด๋ธ๋ฆฌ๋ ๊ฒ์: ๋ฒกํฐ ์ ์ฌ๋ + ํค์๋ ๋งค์นญ"""
|
| 582 |
# 1. ๋ฒกํฐ ๊ฒ์
|
| 583 |
query_embedding = embedder.encode([query], convert_to_numpy=True)[0]
|
| 584 |
vector_results = collection.query(
|
|
@@ -587,8 +622,8 @@ def hybrid_search(query: str, collection, embedder, top_k: int = 3) -> Dict:
|
|
| 587 |
include=["documents", "metadatas", "distances"]
|
| 588 |
)
|
| 589 |
|
| 590 |
-
# 2. ํค์๋ ์ถ์ถ
|
| 591 |
-
keywords =
|
| 592 |
|
| 593 |
# 3. ํ์ด๋ธ๋ฆฌ๋ ์ ์ ๊ณ์ฐ
|
| 594 |
hybrid_results = []
|
|
@@ -617,7 +652,7 @@ def hybrid_search(query: str, collection, embedder, top_k: int = 3) -> Dict:
|
|
| 617 |
'keyword_score': keyword_score
|
| 618 |
})
|
| 619 |
|
| 620 |
-
# 4. ์ ์์ ์ ๋ ฌ ํ ์์
|
| 621 |
hybrid_results.sort(key=lambda x: x['hybrid_score'], reverse=True)
|
| 622 |
top_results = hybrid_results[:top_k]
|
| 623 |
|
|
@@ -746,7 +781,7 @@ def generate_answer(query: str, search_results: Dict, api_key: str) -> str:
|
|
| 746 |
2. **์ธ์ด ํผ์ฉ ๋ฐ ๋น๋ฌธ ๋์**: ์ฌ์ฉ์์ ๋ฌธ์ฅ์ ํ๊ตญ์ด์ ์์ด๊ฐ ์์ด๊ฑฐ๋ ๋ฌธ๋ฒ ์ค๋ฅ๊ฐ ์์ ์ ์์ผ๋ฏ๋ก ์๋๋ฅผ ์ถ๋ก ํ์ฌ ์ ํํ ์ดํดํ๋ผ.
|
| 747 |
3. **๋ชจํธํ ์ง๋ฌธ ์๋ ๋ณด์ **: ์ฌ์ฉ์์ ์ง๋ฌธ์ด ๋ถ์์ ํ๊ฑฐ๋ ๋ชจํธํด๋ ์ง๋ฌธ ์๋๋ฅผ ์ถ๋ก ํ์ฌ ์ ์ ํ๊ฒ ์ฌ๊ตฌ์ฑํ๋ผ.
|
| 748 |
**๋ฌธ์ ๊ธฐ๋ฐ ์๋ต ์์น (์ ๋ ์ถ์ธก ๊ธ์ง):**
|
| 749 |
-
1. ์ ๊ณต๋ ๋ฌธ์๋ฅผ **๋งค์ฐ
|
| 750 |
2. **๋ฐ๋์ ๋ฌธ์์์ ๊ทผ๊ฑฐ๋ฅผ ์ฐพ์ ๋ต๋ณ**ํ๊ณ , ๋ฌธ์์ ์๋ ๋ด์ฉ์ ์์๋ก ์ถ์ธกํ์ง ๋ง๊ณ **"๋ฌธ์์์ ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค"**๋ผ๊ณ ๋ช
์ํ๋ผ
|
| 751 |
3. **๋ฌธ์์ ์ ํ ๋ฌด๊ดํ ์ง๋ฌธ**(์: ์ ์ฌ ์ถ์ฒ, ๋ ์จ, ์ผ์ ๋ํ ๋ฑ)์ **"์ฃ์กํ์ง๋ง, ์ ๊ณต๋ ๋ฌธ์์๋ ํด๋น ์ง๋ฌธ๊ณผ ๊ด๋ จ๋ ์ ๋ณด๊ฐ ํฌํจ๋์ด ์์ง ์์ต๋๋ค."**๋ผ๊ณ ๋ง ๋ต๋ณํ๊ณ ์ถ๊ฐ ์ค๋ช
์์ด ์ข
๋ฃํ๋ผ
|
| 752 |
4. ๋ฌธ์์ ์ ๋ณด๊ฐ ์๋๋ฐ๋ "์๋ค"๊ณ ํ์ง ๋ง์ธ์
|
|
@@ -938,16 +973,22 @@ def main():
|
|
| 938 |
|
| 939 |
# Header ๋ฌธ์ ์ฒ๋ฆฌ ์ ์๋ง ๋ณด์
|
| 940 |
if not st.session_state.processed:
|
| 941 |
-
|
| 942 |
-
|
| 943 |
-
|
| 944 |
-
|
| 945 |
-
|
| 946 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 947 |
|
| 948 |
# ========== ์ฌ์ด๋๋ฐ ==========
|
| 949 |
with st.sidebar:
|
| 950 |
-
st.image("img/plobin.png", width=
|
| 951 |
# st.title("๐ฎ PLOBIN")
|
| 952 |
|
| 953 |
uploaded_file = st.file_uploader(
|
|
@@ -993,28 +1034,11 @@ def main():
|
|
| 993 |
except Exception as e:
|
| 994 |
st.error(f"์ค๋ฅ: {str(e)}")
|
| 995 |
|
| 996 |
-
# ==================== ์์ : ์ฒญํฌ ํ์ ์ ๊ฑฐ ====================
|
| 997 |
# ๋ฌธ์ ์ ๋ณด ํ์ (์ฒญํฌ ์ ๋ณด ์ ์ธ)
|
| 998 |
if st.session_state.processed:
|
| 999 |
st.markdown("#### ๐ ๋ฌธ์ ์ ๋ณด")
|
| 1000 |
st.info(f"๐ **{st.session_state.doc_metadata['filename']}**")
|
| 1001 |
st.info(f"๐ ํ์ด์ง: {st.session_state.doc_metadata['pages']}")
|
| 1002 |
-
# ์ฒญํฌ ํ์ ์ ๊ฑฐ๋จ
|
| 1003 |
-
# ============================================================
|
| 1004 |
-
|
| 1005 |
-
# st.divider()
|
| 1006 |
-
|
| 1007 |
-
# ์ด๊ธฐํ ๋ฒํผ
|
| 1008 |
-
# if st.button("๐ ์ ๋ฌธ์ ์
๋ก๋", use_container_width=True):
|
| 1009 |
-
# st.session_state.processed = False
|
| 1010 |
-
# st.session_state.vector_db = None
|
| 1011 |
-
# st.session_state.embedder = None
|
| 1012 |
-
# st.session_state.chat_history = []
|
| 1013 |
-
# st.session_state.current_highlights = []
|
| 1014 |
-
# st.session_state.pdf_bytes = None
|
| 1015 |
-
# st.session_state.pdf_pages_text = {}
|
| 1016 |
-
# st.session_state.zoom_level = 2.0
|
| 1017 |
-
# st.rerun()
|
| 1018 |
|
| 1019 |
# ===== ์์ง ๋ฌธ์๊ฐ ์ฒ๋ฆฌ๋์ง ์์ ๊ฒฝ์ฐ
|
| 1020 |
if not st.session_state.processed:
|
|
@@ -1175,7 +1199,7 @@ def main():
|
|
| 1175 |
|
| 1176 |
with st.spinner("๐ฎ PLOBIN์ด ๊ฒ์์ค์
๋๋ค..."):
|
| 1177 |
try:
|
| 1178 |
-
# 1. ํ์ด๋ธ๋ฆฌ๋ ๊ฒ์ (๋ฒกํฐ + ํค์๋) - ์์ 3๊ฐ
|
| 1179 |
search_results = hybrid_search(
|
| 1180 |
query,
|
| 1181 |
st.session_state.vector_db,
|
|
|
|
| 20 |
|
| 21 |
import base64
|
| 22 |
|
| 23 |
+
def get_svg_content(svg_path):
|
| 24 |
+
with open(svg_path, "r", encoding="utf-8") as f:
|
| 25 |
+
return f.read()
|
| 26 |
|
| 27 |
# ํ์ผ ์๋จ์์ ํ ๋ฒ๋ง ๋ก๋
|
| 28 |
+
plobin_logo_svg = get_svg_content("img/plobin.svg")
|
| 29 |
|
| 30 |
# ํ๊ฒฝ ๋ณ์ ๋ก๋
|
| 31 |
load_dotenv()
|
|
|
|
| 52 |
initial_sidebar_state="expanded"
|
| 53 |
)
|
| 54 |
|
| 55 |
+
# ์ฌ์ด๋๋ฐ ๊ธฐ์กด
|
| 56 |
+
#667eea 0%,
|
| 57 |
+
#764ba2 100%);
|
| 58 |
+
|
| 59 |
+
# ์ฌ์ด๋๋ฐ 1์
|
| 60 |
+
#5ECFFF 0%,
|
| 61 |
+
#B8FF6E 100%);
|
| 62 |
+
|
| 63 |
+
# ์ฌ์ด๋๋ฐ 2์
|
| 64 |
+
#258CFF 0%,
|
| 65 |
+
#0A1E6A 100%);
|
| 66 |
+
|
| 67 |
# Custom CSS
|
| 68 |
st.markdown("""
|
| 69 |
<style>
|
| 70 |
[data-testid="stSidebar"] {
|
| 71 |
background: linear-gradient(180deg,
|
| 72 |
+
#90B9E8 0%,
|
| 73 |
+
#B3BEC9 100%);
|
| 74 |
box-shadow: 4px 0 30px rgba(0,0,0,0.2);
|
| 75 |
width: 290px !important;
|
| 76 |
}
|
|
|
|
| 352 |
|
| 353 |
/* ์ฑํ
ํ์ดํ ์คํ์ผ (์ ๋๋ฉ์ด์
์ ๊ฑฐ) */
|
| 354 |
.chat-title {
|
| 355 |
+
color: black !important;
|
| 356 |
font-weight: 900 !important;
|
| 357 |
font-size: 1.75rem !important;
|
| 358 |
margin-bottom: 1rem !important;
|
|
|
|
| 451 |
pages_text = {}
|
| 452 |
|
| 453 |
# ==================== ์์ ๋ ์ฒญํฌ ์ค์ ====================
|
| 454 |
+
CHUNK_SIZE = 800 # 300์์ 800์ผ๋ก ์ฆ๊ฐ
|
| 455 |
+
OVERLAP_SIZE = 150 # 60์์ 150์ผ๋ก ์ฆ๊ฐ
|
| 456 |
# ========================================================
|
| 457 |
|
| 458 |
for page_num in range(len(doc)):
|
|
|
|
| 551 |
return collection, embedder
|
| 552 |
|
| 553 |
|
| 554 |
+
# ==================== ์๋ฏธ ๊ธฐ๋ฐ ํค์๋ ์ถ์ถ (ํ๋์ฝ๋ฉ ์ ๊ฑฐ) ====================
|
| 555 |
+
def extract_keywords_semantic(text: str, embedder, top_n: int = 5) -> List[str]:
|
| 556 |
+
"""
|
| 557 |
+
์๋ฏธ ๊ธฐ๋ฐ ํค์๋ ์ถ์ถ - ๋ถ์ฉ์ด ๋ฆฌ์คํธ ๋ถํ์
|
| 558 |
+
|
| 559 |
+
์๋ฆฌ:
|
| 560 |
+
1. ์ซ์ ํฌํจ ๋จ์ด๋ ๋ฌด์กฐ๊ฑด ์ค์ํ๊ฒ ์ทจ๊ธ
|
| 561 |
+
2. ์๋ณธ ํ
์คํธ์ ์๋ฏธ์ ๊ฐ ๋จ์ด์ ์๋ฏธ ์ ์ฌ๋ ๊ณ์ฐ
|
| 562 |
+
3. ์ ์ฌ๋ ร ๋น๋์๋ก ์ ์ ์ฐ์
|
| 563 |
+
4. ์ ์์์ผ๋ก ์์ ํค์๋ ์ถ์ถ
|
| 564 |
+
"""
|
| 565 |
+
# 1. ์ซ์ ํฌํจ ๋จ์ด๋ ๋ฌด์กฐ๊ฑด ํฌํจ (๊ธ์ก, ๋ ์ง, ์๋ ๋ฑ)
|
| 566 |
words_with_numbers = re.findall(r'[๊ฐ-ํฃ]*\d+[๊ฐ-ํฃ]*', text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 567 |
|
| 568 |
+
# 2. ๋ช
์ฌ๊ตฌ ์ถ์ถ (2๊ธ์ ์ด์)
|
| 569 |
+
candidate_words = re.findall(r'[๊ฐ-ํฃ]{2,}', text)
|
|
|
|
|
|
|
| 570 |
|
| 571 |
+
if not candidate_words:
|
| 572 |
+
return words_with_numbers[:top_n]
|
| 573 |
|
| 574 |
+
word_freq = Counter(candidate_words)
|
|
|
|
|
|
|
| 575 |
|
| 576 |
+
# 3. ์๋ณธ ํ
์คํธ์ ๊ฐ ๋จ์ด์ ์๋ฏธ ์ ์ฌ๋ ๊ณ์ฐ
|
| 577 |
+
text_embedding = embedder.encode([text], convert_to_numpy=True)[0]
|
| 578 |
+
word_embeddings = embedder.encode(list(word_freq.keys()), convert_to_numpy=True)
|
| 579 |
+
|
| 580 |
+
# ์ฝ์ฌ์ธ ์ ์ฌ๋ ๊ณ์ฐ
|
| 581 |
+
similarities = util.cos_sim(text_embedding, word_embeddings)[0].numpy()
|
| 582 |
+
|
| 583 |
+
# 4. ์ ์ = ์๋ฏธ์ ์ฌ๋ ร ๋น๋์ (๋น๋๋ ๋ก๊ทธ ์ค์ผ์ผ)
|
| 584 |
+
scored_words = []
|
| 585 |
+
for idx, (word, freq) in enumerate(word_freq.items()):
|
| 586 |
+
# ์๋ฏธ ์ ์ฌ๋ 70% + ๋น๋ 30%
|
| 587 |
+
semantic_score = similarities[idx]
|
| 588 |
+
frequency_score = np.log1p(freq) / 10.0 # ๋น๋์ ๋ก๊ทธ ์ ์ฉ ํ ์ ๊ทํ
|
| 589 |
+
|
| 590 |
+
combined_score = 0.7 * semantic_score + 0.3 * frequency_score
|
| 591 |
+
scored_words.append((word, combined_score))
|
| 592 |
+
|
| 593 |
+
# 5. ์ ์์ ์ ๋ ฌ
|
| 594 |
+
scored_words.sort(key=lambda x: x[1], reverse=True)
|
| 595 |
+
|
| 596 |
+
# 6. ๊ฒฐ๊ณผ ์กฐํฉ: ์ซ์ ํฌํจ ๋จ์ด ์ฐ์ + ์๋ฏธ ์ ์ ๋์ ๋จ์ด
|
| 597 |
result = []
|
|
|
|
| 598 |
|
| 599 |
+
# ์ซ์ ํฌํจ ๋จ์ด ๋จผ์ ์ถ๊ฐ (์ต๋ 3๊ฐ)
|
| 600 |
+
for word in words_with_numbers[:3]:
|
| 601 |
+
if word and word not in result:
|
| 602 |
+
result.append(word)
|
| 603 |
+
|
| 604 |
+
# ๋๋จธ์ง๋ฅผ ์๋ฏธ ์ ์๋ก ์ฑ์
|
| 605 |
+
for word, score in scored_words:
|
| 606 |
if word not in result:
|
| 607 |
result.append(word)
|
| 608 |
if len(result) >= top_n:
|
|
|
|
| 611 |
return result[:top_n]
|
| 612 |
|
| 613 |
|
| 614 |
+
# ==================== ํ์ด๋ธ๋ฆฌ๋ ๊ฒ์ ํจ์ (์๋ฏธ ๊ธฐ๋ฐ ํค์๋ ์ฌ์ฉ) ====================
|
| 615 |
def hybrid_search(query: str, collection, embedder, top_k: int = 3) -> Dict:
|
| 616 |
+
"""ํ์ด๋ธ๋ฆฌ๋ ๊ฒ์: ๋ฒกํฐ ์ ์ฌ๋ + ์๋ฏธ ๊ธฐ๋ฐ ํค์๋ ๋งค์นญ"""
|
| 617 |
# 1. ๋ฒกํฐ ๊ฒ์
|
| 618 |
query_embedding = embedder.encode([query], convert_to_numpy=True)[0]
|
| 619 |
vector_results = collection.query(
|
|
|
|
| 622 |
include=["documents", "metadatas", "distances"]
|
| 623 |
)
|
| 624 |
|
| 625 |
+
# 2. ์๋ฏธ ๊ธฐ๋ฐ ํค์๋ ์ถ์ถ (ํ๋์ฝ๋ฉ ์ ๊ฑฐ)
|
| 626 |
+
keywords = extract_keywords_semantic(query, embedder, top_n=5)
|
| 627 |
|
| 628 |
# 3. ํ์ด๋ธ๋ฆฌ๋ ์ ์ ๊ณ์ฐ
|
| 629 |
hybrid_results = []
|
|
|
|
| 652 |
'keyword_score': keyword_score
|
| 653 |
})
|
| 654 |
|
| 655 |
+
# 4. ์ ์์ ์ ๋ ฌ ํ ์์ k๊ฐ
|
| 656 |
hybrid_results.sort(key=lambda x: x['hybrid_score'], reverse=True)
|
| 657 |
top_results = hybrid_results[:top_k]
|
| 658 |
|
|
|
|
| 781 |
2. **์ธ์ด ํผ์ฉ ๋ฐ ๋น๋ฌธ ๋์**: ์ฌ์ฉ์์ ๋ฌธ์ฅ์ ํ๊ตญ์ด์ ์์ด๊ฐ ์์ด๊ฑฐ๋ ๋ฌธ๋ฒ ์ค๋ฅ๊ฐ ์์ ์ ์์ผ๋ฏ๋ก ์๋๋ฅผ ์ถ๋ก ํ์ฌ ์ ํํ ์ดํดํ๋ผ.
|
| 782 |
3. **๋ชจํธํ ์ง๋ฌธ ์๋ ๋ณด์ **: ์ฌ์ฉ์์ ์ง๋ฌธ์ด ๋ถ์์ ํ๊ฑฐ๋ ๋ชจํธํด๋ ์ง๋ฌธ ์๋๋ฅผ ์ถ๋ก ํ์ฌ ์ ์ ํ๊ฒ ์ฌ๊ตฌ์ฑํ๋ผ.
|
| 783 |
**๋ฌธ์ ๊ธฐ๋ฐ ์๋ต ์์น (์ ๋ ์ถ์ธก ๊ธ์ง):**
|
| 784 |
+
1. ์ ๊ณต๋ ๋ฌธ์๋ฅผ **๋งค์ฐ ๏ฟฝ๊ผผํ** ์ฝ๊ณ ์ ํํ ์ ๋ณด๋ฅผ ์ฐพ์ผ์ธ์
|
| 785 |
2. **๋ฐ๋์ ๋ฌธ์์์ ๊ทผ๊ฑฐ๋ฅผ ์ฐพ์ ๋ต๋ณ**ํ๊ณ , ๋ฌธ์์ ์๋ ๋ด์ฉ์ ์์๋ก ์ถ์ธกํ์ง ๋ง๊ณ **"๋ฌธ์์์ ๊ด๋ จ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค"**๋ผ๊ณ ๋ช
์ํ๋ผ
|
| 786 |
3. **๋ฌธ์์ ์ ํ ๋ฌด๊ดํ ์ง๋ฌธ**(์: ์ ์ฌ ์ถ์ฒ, ๋ ์จ, ์ผ์ ๋ํ ๋ฑ)์ **"์ฃ์กํ์ง๋ง, ์ ๊ณต๋ ๋ฌธ์์๋ ํด๋น ์ง๋ฌธ๊ณผ ๊ด๋ จ๋ ์ ๋ณด๊ฐ ํฌํจ๋์ด ์์ง ์์ต๋๋ค."**๋ผ๊ณ ๋ง ๋ต๋ณํ๊ณ ์ถ๊ฐ ์ค๋ช
์์ด ์ข
๋ฃํ๋ผ
|
| 787 |
4. ๋ฌธ์์ ์ ๋ณด๊ฐ ์๋๋ฐ๋ "์๋ค"๊ณ ํ์ง ๋ง์ธ์
|
|
|
|
| 973 |
|
| 974 |
# Header ๋ฌธ์ ์ฒ๋ฆฌ ์ ์๋ง ๋ณด์
|
| 975 |
if not st.session_state.processed:
|
| 976 |
+
# ๋ก๊ณ ๊ฐ์ด๋ฐ ์ ๋ ฌ
|
| 977 |
+
col1, col2, col3 = st.columns([1, 1, 1])
|
| 978 |
+
with col2:
|
| 979 |
+
st.image("img/plobin.svg", use_container_width=True)
|
| 980 |
+
st.text(' ')
|
| 981 |
+
|
| 982 |
+
# ์๋ธํ์ดํ
|
| 983 |
+
# st.markdown("""
|
| 984 |
+
# <div style="text-align: center; margin-top: 10px;">
|
| 985 |
+
# ๋ฌธ์ ์ ๋ต์ ์ฐพ์์ฃผ๋ AI ๋น์
|
| 986 |
+
# </div>
|
| 987 |
+
# """, unsafe_allow_html=True)
|
| 988 |
|
| 989 |
# ========== ์ฌ์ด๋๋ฐ ==========
|
| 990 |
with st.sidebar:
|
| 991 |
+
st.image("img/plobin-left-only.png", width=30) # ํฝ์
๊ฐ์ผ๋ก ์ง์ ์ง์
|
| 992 |
# st.title("๐ฎ PLOBIN")
|
| 993 |
|
| 994 |
uploaded_file = st.file_uploader(
|
|
|
|
| 1034 |
except Exception as e:
|
| 1035 |
st.error(f"์ค๋ฅ: {str(e)}")
|
| 1036 |
|
|
|
|
| 1037 |
# ๋ฌธ์ ์ ๋ณด ํ์ (์ฒญํฌ ์ ๋ณด ์ ์ธ)
|
| 1038 |
if st.session_state.processed:
|
| 1039 |
st.markdown("#### ๐ ๋ฌธ์ ์ ๋ณด")
|
| 1040 |
st.info(f"๐ **{st.session_state.doc_metadata['filename']}**")
|
| 1041 |
st.info(f"๐ ํ์ด์ง: {st.session_state.doc_metadata['pages']}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1042 |
|
| 1043 |
# ===== ์์ง ๋ฌธ์๊ฐ ์ฒ๋ฆฌ๋์ง ์์ ๊ฒฝ์ฐ
|
| 1044 |
if not st.session_state.processed:
|
|
|
|
| 1199 |
|
| 1200 |
with st.spinner("๐ฎ PLOBIN์ด ๊ฒ์์ค์
๋๋ค..."):
|
| 1201 |
try:
|
| 1202 |
+
# 1. ํ์ด๋ธ๋ฆฌ๋ ๊ฒ์ (๋ฒกํฐ + ์๋ฏธ ๊ธฐ๋ฐ ํค์๋) - ์์ 3๊ฐ
|
| 1203 |
search_results = hybrid_search(
|
| 1204 |
query,
|
| 1205 |
st.session_state.vector_db,
|