dohyune commited on
Commit
9807dd4
ยท
verified ยท
1 Parent(s): cefe786

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -62
app.py CHANGED
@@ -20,12 +20,12 @@ import json
20
 
21
  import base64
22
 
23
- def get_image_base64(image_path):
24
- with open(image_path, "rb") as img_file:
25
- return base64.b64encode(img_file.read()).decode()
26
 
27
  # ํŒŒ์ผ ์ƒ๋‹จ์—์„œ ํ•œ ๋ฒˆ๋งŒ ๋กœ๋“œ
28
- plobin_logo_base64 = get_image_base64("img/plobin.png")
29
 
30
  # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
31
  load_dotenv()
@@ -52,13 +52,25 @@ st.set_page_config(
52
  initial_sidebar_state="expanded"
53
  )
54
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  # Custom CSS
56
  st.markdown("""
57
  <style>
58
  [data-testid="stSidebar"] {
59
  background: linear-gradient(180deg,
60
- #667eea 0%,
61
- #764ba2 100%);
62
  box-shadow: 4px 0 30px rgba(0,0,0,0.2);
63
  width: 290px !important;
64
  }
@@ -340,7 +352,7 @@ st.markdown("""
340
 
341
  /* ์ฑ„ํŒ… ํƒ€์ดํ‹€ ์Šคํƒ€์ผ (์• ๋‹ˆ๋ฉ”์ด์…˜ ์ œ๊ฑฐ) */
342
  .chat-title {
343
- color: white !important;
344
  font-weight: 900 !important;
345
  font-size: 1.75rem !important;
346
  margin-bottom: 1rem !important;
@@ -439,8 +451,8 @@ def extract_text_from_pdf(pdf_file) -> Tuple[List[str], List[Dict], bytes, Dict]
439
  pages_text = {}
440
 
441
  # ==================== ์ˆ˜์ •๋œ ์ฒญํฌ ์„ค์ • ====================
442
- CHUNK_SIZE = 300 # 300์—์„œ 800์œผ๋กœ ์ฆ๊ฐ€
443
- OVERLAP_SIZE = 60 # 60์—์„œ 150์œผ๋กœ ์ฆ๊ฐ€
444
  # ========================================================
445
 
446
  for page_num in range(len(doc)):
@@ -539,35 +551,58 @@ def create_vector_db(chunks: List[str], metadata_list: List[Dict]):
539
  return collection, embedder
540
 
541
 
542
- def extract_keywords(text: str, top_n: int = 5) -> List[str]:
543
- """ํ‚ค์›Œ๋“œ ์ถ”์ถœ"""
 
 
 
 
 
 
 
 
 
 
544
  words_with_numbers = re.findall(r'[๊ฐ€-ํžฃ]*\d+[๊ฐ€-ํžฃ]*', text)
545
- words = re.findall(r'[๊ฐ€-ํžฃ]{2,}', text)
546
-
547
- stopwords = {
548
- '๊ฒƒ', '๋“ฑ', '๋ฐ', '๊ทธ', '์ด', '์ €', '์ˆ˜', '๋•Œ', '์ค‘', '๋‚ด', '๋…„', '์›”', '์ผ',
549
- '๊ฒฝ์šฐ', '๋Œ€ํ•œ', 'ํ†ตํ•ด', '์œ„ํ•ด', '๊ด€๋ จ', '์žˆ๋Š”', 'ํ•˜๋Š”', '๋˜๋Š”', '์ด๋Ÿฐ', '์ €๋Ÿฐ',
550
- '์–ด๋–ค', '๋ฌด์Šจ', '์–ด๋А', '๋ˆ„๊ตฌ', '์–ธ์ œ', '์–ด๋””', '๋ฌด์—‡', '์–ด๋–ป๊ฒŒ', '์™œ',
551
- '์•Œ๋ ค', '์„ค๋ช…', '๋งํ•ด', '๋Œ€ํ•ด', '๊ด€ํ•˜์—ฌ', '์žˆ๋‚˜์š”', '์ธ๊ฐ€์š”', '๋ฌด์—‡์ธ๊ฐ€์š”',
552
- '์–ผ๋งˆ', '์ž…๋‹ˆ๊นŒ', 'ํ•ฉ๋‹ˆ๊นŒ'
553
- }
554
 
555
- important_keywords = {
556
- '๊ธˆ์•ก', '๊ฐ€๊ฒฉ', '๋น„์šฉ', '์˜ˆ์‚ฐ', '์„ค๊ณ„', '์‚ฌ์—…', '๊ณผ์—…', '๊ณ„์•ฝ',
557
- '๊ณต์‚ฌ', '์šฉ์—ญ', '์ œ์•ˆ', '์ž…์ฐฐ', '๋‚™์ฐฐ', '๊ฒฌ์ ', '๋‹จ๊ฐ€'
558
- }
559
 
560
- filtered_words = [w for w in words if w not in stopwords and len(w) >= 2]
561
- word_freq = Counter(filtered_words)
562
 
563
- for word in word_freq:
564
- if word in important_keywords:
565
- word_freq[word] += 5
566
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
  result = []
568
- result.extend([w for w in words_with_numbers if w])
569
 
570
- for word, _ in word_freq.most_common(top_n * 2):
 
 
 
 
 
 
571
  if word not in result:
572
  result.append(word)
573
  if len(result) >= top_n:
@@ -576,9 +611,9 @@ def extract_keywords(text: str, top_n: int = 5) -> List[str]:
576
  return result[:top_n]
577
 
578
 
579
- # ==================== ์ƒˆ๋กœ์šด ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰ ํ•จ์ˆ˜ ====================
580
  def hybrid_search(query: str, collection, embedder, top_k: int = 3) -> Dict:
581
- """ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰: ๋ฒกํ„ฐ ์œ ์‚ฌ๋„ + ํ‚ค์›Œ๋“œ ๋งค์นญ"""
582
  # 1. ๋ฒกํ„ฐ ๊ฒ€์ƒ‰
583
  query_embedding = embedder.encode([query], convert_to_numpy=True)[0]
584
  vector_results = collection.query(
@@ -587,8 +622,8 @@ def hybrid_search(query: str, collection, embedder, top_k: int = 3) -> Dict:
587
  include=["documents", "metadatas", "distances"]
588
  )
589
 
590
- # 2. ํ‚ค์›Œ๋“œ ์ถ”์ถœ
591
- keywords = extract_keywords(query, top_n=5)
592
 
593
  # 3. ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ ์ˆ˜ ๊ณ„์‚ฐ
594
  hybrid_results = []
@@ -617,7 +652,7 @@ def hybrid_search(query: str, collection, embedder, top_k: int = 3) -> Dict:
617
  'keyword_score': keyword_score
618
  })
619
 
620
- # 4. ์ ์ˆ˜์ˆœ ์ •๋ ฌ ํ›„ ์ƒ์œ„ 5๊ฐœ
621
  hybrid_results.sort(key=lambda x: x['hybrid_score'], reverse=True)
622
  top_results = hybrid_results[:top_k]
623
 
@@ -746,7 +781,7 @@ def generate_answer(query: str, search_results: Dict, api_key: str) -> str:
746
  2. **์–ธ์–ด ํ˜ผ์šฉ ๋ฐ ๋น„๋ฌธ ๋Œ€์‘**: ์‚ฌ์šฉ์ž์˜ ๋ฌธ์žฅ์€ ํ•œ๊ตญ์–ด์™€ ์˜์–ด๊ฐ€ ์„ž์ด๊ฑฐ๋‚˜ ๋ฌธ๋ฒ• ์˜ค๋ฅ˜๊ฐ€ ์žˆ์„ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ ์˜๋„๋ฅผ ์ถ”๋ก ํ•˜์—ฌ ์ •ํ™•ํžˆ ์ดํ•ดํ•˜๋ผ.
747
  3. **๋ชจํ˜ธํ•œ ์งˆ๋ฌธ ์ž๋™ ๋ณด์ •**: ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์ด ๋ถˆ์™„์ „ํ•˜๊ฑฐ๋‚˜ ๋ชจํ˜ธํ•ด๋„ ์งˆ๋ฌธ ์˜๋„๋ฅผ ์ถ”๋ก ํ•˜์—ฌ ์ ์ ˆํ•˜๊ฒŒ ์žฌ๊ตฌ์„ฑํ•˜๋ผ.
748
  **๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์‘๋‹ต ์›์น™ (์ ˆ๋Œ€ ์ถ”์ธก ๊ธˆ์ง€):**
749
- 1. ์ œ๊ณต๋œ ๋ฌธ์„œ๋ฅผ **๋งค์šฐ ๊ผผ๊ผผํžˆ** ์ฝ๊ณ  ์ •ํ™•ํ•œ ์ •๋ณด๋ฅผ ์ฐพ์œผ์„ธ์š”
750
  2. **๋ฐ˜๋“œ์‹œ ๋ฌธ์„œ์—์„œ ๊ทผ๊ฑฐ๋ฅผ ์ฐพ์•„ ๋‹ต๋ณ€**ํ•˜๊ณ , ๋ฌธ์„œ์— ์—†๋Š” ๋‚ด์šฉ์€ ์ž„์˜๋กœ ์ถ”์ธกํ•˜์ง€ ๋ง๊ณ  **"๋ฌธ์„œ์—์„œ ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค"**๋ผ๊ณ  ๋ช…์‹œํ•˜๋ผ
751
  3. **๋ฌธ์„œ์™€ ์ „ํ˜€ ๋ฌด๊ด€ํ•œ ์งˆ๋ฌธ**(์˜ˆ: ์ ์‹ฌ ์ถ”์ฒœ, ๋‚ ์”จ, ์ผ์ƒ ๋Œ€ํ™” ๋“ฑ)์€ **"์ฃ„์†กํ•˜์ง€๋งŒ, ์ œ๊ณต๋œ ๋ฌธ์„œ์—๋Š” ํ•ด๋‹น ์งˆ๋ฌธ๊ณผ ๊ด€๋ จ๋œ ์ •๋ณด๊ฐ€ ํฌํ•จ๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค."**๋ผ๊ณ ๋งŒ ๋‹ต๋ณ€ํ•˜๊ณ  ์ถ”๊ฐ€ ์„ค๋ช… ์—†์ด ์ข…๋ฃŒํ•˜๋ผ
752
  4. ๋ฌธ์„œ์— ์ •๋ณด๊ฐ€ ์žˆ๋Š”๋ฐ๋„ "์—†๋‹ค"๊ณ  ํ•˜์ง€ ๋งˆ์„ธ์š”
@@ -938,16 +973,22 @@ def main():
938
 
939
  # Header ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์ „์—๋งŒ ๋ณด์ž„
940
  if not st.session_state.processed:
941
- st.markdown(f"""
942
- <div class="plobin-header">
943
- <img src="data:image/png;base64,{plobin_logo_base64}" class="plobin-logo" alt="PLOBIN" style="height: 60px; margin-bottom: 10px;">
944
- <div class="plobin-subtitle">๋ฌธ์„œ ์† ๋‹ต์„ ์ฐพ์•„์ฃผ๋Š” AI ๋น„์„œ</div>
945
- </div>
946
- """, unsafe_allow_html=True)
 
 
 
 
 
 
947
 
948
  # ========== ์‚ฌ์ด๋“œ๋ฐ” ==========
949
  with st.sidebar:
950
- st.image("img/plobin.png", width=120) # ํ”ฝ์…€ ๊ฐ’์œผ๋กœ ์ง์ ‘ ์ง€์ •
951
  # st.title("๐Ÿ”ฎ PLOBIN")
952
 
953
  uploaded_file = st.file_uploader(
@@ -993,28 +1034,11 @@ def main():
993
  except Exception as e:
994
  st.error(f"์˜ค๋ฅ˜: {str(e)}")
995
 
996
- # ==================== ์ˆ˜์ •: ์ฒญํฌ ํ‘œ์‹œ ์ œ๊ฑฐ ====================
997
  # ๋ฌธ์„œ ์ •๋ณด ํ‘œ์‹œ (์ฒญํฌ ์ •๋ณด ์ œ์™ธ)
998
  if st.session_state.processed:
999
  st.markdown("#### ๐Ÿ“Š ๋ฌธ์„œ ์ •๋ณด")
1000
  st.info(f"๐Ÿ“„ **{st.session_state.doc_metadata['filename']}**")
1001
  st.info(f"๐Ÿ“‘ ํŽ˜์ด์ง€: {st.session_state.doc_metadata['pages']}")
1002
- # ์ฒญํฌ ํ‘œ์‹œ ์ œ๊ฑฐ๋จ
1003
- # ============================================================
1004
-
1005
- # st.divider()
1006
-
1007
- # ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ
1008
- # if st.button("๐Ÿ”„ ์ƒˆ ๋ฌธ์„œ ์—…๋กœ๋“œ", use_container_width=True):
1009
- # st.session_state.processed = False
1010
- # st.session_state.vector_db = None
1011
- # st.session_state.embedder = None
1012
- # st.session_state.chat_history = []
1013
- # st.session_state.current_highlights = []
1014
- # st.session_state.pdf_bytes = None
1015
- # st.session_state.pdf_pages_text = {}
1016
- # st.session_state.zoom_level = 2.0
1017
- # st.rerun()
1018
 
1019
  # ===== ์•„์ง ๋ฌธ์„œ๊ฐ€ ์ฒ˜๋ฆฌ๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ
1020
  if not st.session_state.processed:
@@ -1175,7 +1199,7 @@ def main():
1175
 
1176
  with st.spinner("๐Ÿ”ฎ PLOBIN์ด ๊ฒ€์ƒ‰์ค‘์ž…๋‹ˆ๋‹ค..."):
1177
  try:
1178
- # 1. ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰ (๋ฒกํ„ฐ + ํ‚ค์›Œ๋“œ) - ์ƒ์œ„ 3๊ฐœ
1179
  search_results = hybrid_search(
1180
  query,
1181
  st.session_state.vector_db,
 
20
 
21
  import base64
22
 
23
+ def get_svg_content(svg_path):
24
+ with open(svg_path, "r", encoding="utf-8") as f:
25
+ return f.read()
26
 
27
  # ํŒŒ์ผ ์ƒ๋‹จ์—์„œ ํ•œ ๋ฒˆ๋งŒ ๋กœ๋“œ
28
+ plobin_logo_svg = get_svg_content("img/plobin.svg")
29
 
30
  # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
31
  load_dotenv()
 
52
  initial_sidebar_state="expanded"
53
  )
54
 
55
+ # ์‚ฌ์ด๋“œ๋ฐ” ๊ธฐ์กด
56
+ #667eea 0%,
57
+ #764ba2 100%);
58
+
59
+ # ์‚ฌ์ด๋“œ๋ฐ” 1์•ˆ
60
+ #5ECFFF 0%,
61
+ #B8FF6E 100%);
62
+
63
+ # ์‚ฌ์ด๋“œ๋ฐ” 2์•ˆ
64
+ #258CFF 0%,
65
+ #0A1E6A 100%);
66
+
67
  # Custom CSS
68
  st.markdown("""
69
  <style>
70
  [data-testid="stSidebar"] {
71
  background: linear-gradient(180deg,
72
+ #90B9E8 0%,
73
+ #B3BEC9 100%);
74
  box-shadow: 4px 0 30px rgba(0,0,0,0.2);
75
  width: 290px !important;
76
  }
 
352
 
353
  /* ์ฑ„ํŒ… ํƒ€์ดํ‹€ ์Šคํƒ€์ผ (์• ๋‹ˆ๋ฉ”์ด์…˜ ์ œ๊ฑฐ) */
354
  .chat-title {
355
+ color: black !important;
356
  font-weight: 900 !important;
357
  font-size: 1.75rem !important;
358
  margin-bottom: 1rem !important;
 
451
  pages_text = {}
452
 
453
  # ==================== ์ˆ˜์ •๋œ ์ฒญํฌ ์„ค์ • ====================
454
+ CHUNK_SIZE = 800 # 300์—์„œ 800์œผ๋กœ ์ฆ๊ฐ€
455
+ OVERLAP_SIZE = 150 # 60์—์„œ 150์œผ๋กœ ์ฆ๊ฐ€
456
  # ========================================================
457
 
458
  for page_num in range(len(doc)):
 
551
  return collection, embedder
552
 
553
 
554
+ # ==================== ์˜๋ฏธ ๊ธฐ๋ฐ˜ ํ‚ค์›Œ๋“œ ์ถ”์ถœ (ํ•˜๋“œ์ฝ”๋”ฉ ์ œ๊ฑฐ) ====================
555
+ def extract_keywords_semantic(text: str, embedder, top_n: int = 5) -> List[str]:
556
+ """
557
+ ์˜๋ฏธ ๊ธฐ๋ฐ˜ ํ‚ค์›Œ๋“œ ์ถ”์ถœ - ๋ถˆ์šฉ์–ด ๋ฆฌ์ŠคํŠธ ๋ถˆํ•„์š”
558
+
559
+ ์›๋ฆฌ:
560
+ 1. ์ˆซ์ž ํฌํ•จ ๋‹จ์–ด๋Š” ๋ฌด์กฐ๊ฑด ์ค‘์š”ํ•˜๊ฒŒ ์ทจ๊ธ‰
561
+ 2. ์›๋ณธ ํ…์ŠคํŠธ์˜ ์˜๋ฏธ์™€ ๊ฐ ๋‹จ์–ด์˜ ์˜๋ฏธ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
562
+ 3. ์œ ์‚ฌ๋„ ร— ๋นˆ๋„์ˆ˜๋กœ ์ ์ˆ˜ ์‚ฐ์ •
563
+ 4. ์ ์ˆ˜์ˆœ์œผ๋กœ ์ƒ์œ„ ํ‚ค์›Œ๋“œ ์ถ”์ถœ
564
+ """
565
+ # 1. ์ˆซ์ž ํฌํ•จ ๋‹จ์–ด๋Š” ๋ฌด์กฐ๊ฑด ํฌํ•จ (๊ธˆ์•ก, ๋‚ ์งœ, ์ˆ˜๋Ÿ‰ ๋“ฑ)
566
  words_with_numbers = re.findall(r'[๊ฐ€-ํžฃ]*\d+[๊ฐ€-ํžฃ]*', text)
 
 
 
 
 
 
 
 
 
567
 
568
+ # 2. ๋ช…์‚ฌ๊ตฌ ์ถ”์ถœ (2๊ธ€์ž ์ด์ƒ)
569
+ candidate_words = re.findall(r'[๊ฐ€-ํžฃ]{2,}', text)
 
 
570
 
571
+ if not candidate_words:
572
+ return words_with_numbers[:top_n]
573
 
574
+ word_freq = Counter(candidate_words)
 
 
575
 
576
+ # 3. ์›๋ณธ ํ…์ŠคํŠธ์™€ ๊ฐ ๋‹จ์–ด์˜ ์˜๋ฏธ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
577
+ text_embedding = embedder.encode([text], convert_to_numpy=True)[0]
578
+ word_embeddings = embedder.encode(list(word_freq.keys()), convert_to_numpy=True)
579
+
580
+ # ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
581
+ similarities = util.cos_sim(text_embedding, word_embeddings)[0].numpy()
582
+
583
+ # 4. ์ ์ˆ˜ = ์˜๋ฏธ์œ ์‚ฌ๋„ ร— ๋นˆ๋„์ˆ˜ (๋นˆ๋„๋Š” ๋กœ๊ทธ ์Šค์ผ€์ผ)
584
+ scored_words = []
585
+ for idx, (word, freq) in enumerate(word_freq.items()):
586
+ # ์˜๋ฏธ ์œ ์‚ฌ๋„ 70% + ๋นˆ๋„ 30%
587
+ semantic_score = similarities[idx]
588
+ frequency_score = np.log1p(freq) / 10.0 # ๋นˆ๋„์— ๋กœ๊ทธ ์ ์šฉ ํ›„ ์ •๊ทœํ™”
589
+
590
+ combined_score = 0.7 * semantic_score + 0.3 * frequency_score
591
+ scored_words.append((word, combined_score))
592
+
593
+ # 5. ์ ์ˆ˜์ˆœ ์ •๋ ฌ
594
+ scored_words.sort(key=lambda x: x[1], reverse=True)
595
+
596
+ # 6. ๊ฒฐ๊ณผ ์กฐํ•ฉ: ์ˆซ์ž ํฌํ•จ ๋‹จ์–ด ์šฐ์„  + ์˜๋ฏธ ์ ์ˆ˜ ๋†’์€ ๋‹จ์–ด
597
  result = []
 
598
 
599
+ # ์ˆซ์ž ํฌํ•จ ๋‹จ์–ด ๋จผ์ € ์ถ”๊ฐ€ (์ตœ๋Œ€ 3๊ฐœ)
600
+ for word in words_with_numbers[:3]:
601
+ if word and word not in result:
602
+ result.append(word)
603
+
604
+ # ๋‚˜๋จธ์ง€๋ฅผ ์˜๋ฏธ ์ ์ˆ˜๋กœ ์ฑ„์›€
605
+ for word, score in scored_words:
606
  if word not in result:
607
  result.append(word)
608
  if len(result) >= top_n:
 
611
  return result[:top_n]
612
 
613
 
614
+ # ==================== ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰ ํ•จ์ˆ˜ (์˜๋ฏธ ๊ธฐ๋ฐ˜ ํ‚ค์›Œ๋“œ ์‚ฌ์šฉ) ====================
615
  def hybrid_search(query: str, collection, embedder, top_k: int = 3) -> Dict:
616
+ """ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰: ๋ฒกํ„ฐ ์œ ์‚ฌ๋„ + ์˜๋ฏธ ๊ธฐ๋ฐ˜ ํ‚ค์›Œ๋“œ ๋งค์นญ"""
617
  # 1. ๋ฒกํ„ฐ ๊ฒ€์ƒ‰
618
  query_embedding = embedder.encode([query], convert_to_numpy=True)[0]
619
  vector_results = collection.query(
 
622
  include=["documents", "metadatas", "distances"]
623
  )
624
 
625
+ # 2. ์˜๋ฏธ ๊ธฐ๋ฐ˜ ํ‚ค์›Œ๋“œ ์ถ”์ถœ (ํ•˜๋“œ์ฝ”๋”ฉ ์ œ๊ฑฐ)
626
+ keywords = extract_keywords_semantic(query, embedder, top_n=5)
627
 
628
  # 3. ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ ์ˆ˜ ๊ณ„์‚ฐ
629
  hybrid_results = []
 
652
  'keyword_score': keyword_score
653
  })
654
 
655
+ # 4. ์ ์ˆ˜์ˆœ ์ •๋ ฌ ํ›„ ์ƒ์œ„ k๊ฐœ
656
  hybrid_results.sort(key=lambda x: x['hybrid_score'], reverse=True)
657
  top_results = hybrid_results[:top_k]
658
 
 
781
  2. **์–ธ์–ด ํ˜ผ์šฉ ๋ฐ ๋น„๋ฌธ ๋Œ€์‘**: ์‚ฌ์šฉ์ž์˜ ๋ฌธ์žฅ์€ ํ•œ๊ตญ์–ด์™€ ์˜์–ด๊ฐ€ ์„ž์ด๊ฑฐ๋‚˜ ๋ฌธ๋ฒ• ์˜ค๋ฅ˜๊ฐ€ ์žˆ์„ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ ์˜๋„๋ฅผ ์ถ”๋ก ํ•˜์—ฌ ์ •ํ™•ํžˆ ์ดํ•ดํ•˜๋ผ.
782
  3. **๋ชจํ˜ธํ•œ ์งˆ๋ฌธ ์ž๋™ ๋ณด์ •**: ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์ด ๋ถˆ์™„์ „ํ•˜๊ฑฐ๋‚˜ ๋ชจํ˜ธํ•ด๋„ ์งˆ๋ฌธ ์˜๋„๋ฅผ ์ถ”๋ก ํ•˜์—ฌ ์ ์ ˆํ•˜๊ฒŒ ์žฌ๊ตฌ์„ฑํ•˜๋ผ.
783
  **๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์‘๋‹ต ์›์น™ (์ ˆ๋Œ€ ์ถ”์ธก ๊ธˆ์ง€):**
784
+ 1. ์ œ๊ณต๋œ ๋ฌธ์„œ๋ฅผ **๋งค์šฐ ๏ฟฝ๊ผผํžˆ** ์ฝ๊ณ  ์ •ํ™•ํ•œ ์ •๋ณด๋ฅผ ์ฐพ์œผ์„ธ์š”
785
  2. **๋ฐ˜๋“œ์‹œ ๋ฌธ์„œ์—์„œ ๊ทผ๊ฑฐ๋ฅผ ์ฐพ์•„ ๋‹ต๋ณ€**ํ•˜๊ณ , ๋ฌธ์„œ์— ์—†๋Š” ๋‚ด์šฉ์€ ์ž„์˜๋กœ ์ถ”์ธกํ•˜์ง€ ๋ง๊ณ  **"๋ฌธ์„œ์—์„œ ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค"**๋ผ๊ณ  ๋ช…์‹œํ•˜๋ผ
786
  3. **๋ฌธ์„œ์™€ ์ „ํ˜€ ๋ฌด๊ด€ํ•œ ์งˆ๋ฌธ**(์˜ˆ: ์ ์‹ฌ ์ถ”์ฒœ, ๋‚ ์”จ, ์ผ์ƒ ๋Œ€ํ™” ๋“ฑ)์€ **"์ฃ„์†กํ•˜์ง€๋งŒ, ์ œ๊ณต๋œ ๋ฌธ์„œ์—๋Š” ํ•ด๋‹น ์งˆ๋ฌธ๊ณผ ๊ด€๋ จ๋œ ์ •๋ณด๊ฐ€ ํฌํ•จ๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค."**๋ผ๊ณ ๋งŒ ๋‹ต๋ณ€ํ•˜๊ณ  ์ถ”๊ฐ€ ์„ค๋ช… ์—†์ด ์ข…๋ฃŒํ•˜๋ผ
787
  4. ๋ฌธ์„œ์— ์ •๋ณด๊ฐ€ ์žˆ๋Š”๋ฐ๋„ "์—†๋‹ค"๊ณ  ํ•˜์ง€ ๋งˆ์„ธ์š”
 
973
 
974
  # Header ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์ „์—๋งŒ ๋ณด์ž„
975
  if not st.session_state.processed:
976
+ # ๋กœ๊ณ  ๊ฐ€์šด๋ฐ ์ •๋ ฌ
977
+ col1, col2, col3 = st.columns([1, 1, 1])
978
+ with col2:
979
+ st.image("img/plobin.svg", use_container_width=True)
980
+ st.text(' ')
981
+
982
+ # ์„œ๋ธŒํƒ€์ดํ‹€
983
+ # st.markdown("""
984
+ # <div style="text-align: center; margin-top: 10px;">
985
+ # ๋ฌธ์„œ ์† ๋‹ต์„ ์ฐพ์•„์ฃผ๋Š” AI ๋น„์„œ
986
+ # </div>
987
+ # """, unsafe_allow_html=True)
988
 
989
  # ========== ์‚ฌ์ด๋“œ๋ฐ” ==========
990
  with st.sidebar:
991
+ st.image("img/plobin-left-only.png", width=30) # ํ”ฝ์…€ ๊ฐ’์œผ๋กœ ์ง์ ‘ ์ง€์ •
992
  # st.title("๐Ÿ”ฎ PLOBIN")
993
 
994
  uploaded_file = st.file_uploader(
 
1034
  except Exception as e:
1035
  st.error(f"์˜ค๋ฅ˜: {str(e)}")
1036
 
 
1037
  # ๋ฌธ์„œ ์ •๋ณด ํ‘œ์‹œ (์ฒญํฌ ์ •๋ณด ์ œ์™ธ)
1038
  if st.session_state.processed:
1039
  st.markdown("#### ๐Ÿ“Š ๋ฌธ์„œ ์ •๋ณด")
1040
  st.info(f"๐Ÿ“„ **{st.session_state.doc_metadata['filename']}**")
1041
  st.info(f"๐Ÿ“‘ ํŽ˜์ด์ง€: {st.session_state.doc_metadata['pages']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1042
 
1043
  # ===== ์•„์ง ๋ฌธ์„œ๊ฐ€ ์ฒ˜๋ฆฌ๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ
1044
  if not st.session_state.processed:
 
1199
 
1200
  with st.spinner("๐Ÿ”ฎ PLOBIN์ด ๊ฒ€์ƒ‰์ค‘์ž…๋‹ˆ๋‹ค..."):
1201
  try:
1202
+ # 1. ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰ (๋ฒกํ„ฐ + ์˜๋ฏธ ๊ธฐ๋ฐ˜ ํ‚ค์›Œ๋“œ) - ์ƒ์œ„ 3๊ฐœ
1203
  search_results = hybrid_search(
1204
  query,
1205
  st.session_state.vector_db,