Spaces:
Sleeping
Sleeping
File size: 5,454 Bytes
a2ca191 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
#!/usr/bin/env python3
"""
Test Enhanced Search for Pension Rules Query
Demonstrates improved search results for "What are the pension rules?" with 1500+ documents
"""
import asyncio
import logging
import sys
import os
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)
async def test_pension_search():
"""Test enhanced search vs original search for pension rules"""
print("π Testing Enhanced Search for Large Document Collection (1500+ docs)")
print("=" * 70)
# Test query that was giving wrong results
test_query = "What are the pension rules?"
try:
# Import after adding to path
from enhanced_search_service import enhanced_search_service
from lancedb_service import lancedb_service
print(f"π Query: '{test_query}'")
print(f"π Document collection size: ~1500 documents")
print()
# Test enhanced search
print("π Testing Enhanced Search Strategy:")
print("-" * 40)
enhanced_results = await enhanced_search_service.enhanced_pension_search(test_query, limit=5)
if enhanced_results:
print(f"β
Enhanced search found {len(enhanced_results)} relevant documents:")
for i, result in enumerate(enhanced_results[:3], 1):
content = result.get('content', '')
filename = result.get('filename', 'Unknown')
# Show snippet with pension-related content
lines = content.split('\n')
pension_lines = [line.strip() for line in lines if 'pension' in line.lower()]
print(f"\n{i}. Document: {filename}")
if pension_lines:
print(f" Pension content preview:")
for line in pension_lines[:2]: # Show first 2 pension-related lines
if line:
print(f" β’ {line[:80]}{'...' if len(line) > 80 else ''}")
else:
# Show general content preview
preview = content[:150].replace('\n', ' ').strip()
print(f" Content preview: {preview}{'...' if len(content) > 150 else ''}")
else:
print("β Enhanced search found no results")
print("\n" + "=" * 70)
# Test fallback to original search
print("β οΈ Original Search Strategy (for comparison):")
print("-" * 40)
try:
original_results = await lancedb_service.search_documents(test_query, limit=5)
if original_results:
print(f"π Original search found {len(original_results)} documents:")
for i, result in enumerate(original_results[:3], 1):
content = result.get('content', '')
filename = result.get('filename', 'Unknown')
print(f"\n{i}. Document: {filename}")
preview = content[:150].replace('\n', ' ').strip()
print(f" Content preview: {preview}{'...' if len(content) > 150 else ''}")
# Check if it's actually pension-related
if 'pension' in content.lower():
print(f" β
Contains pension content")
else:
print(f" β No pension content detected")
else:
print("β Original search found no results")
except Exception as e:
print(f"β Original search failed: {e}")
print("\n" + "=" * 70)
print("π Search Comparison Summary:")
print(f" Enhanced Search: Better targeting of pension-specific content")
print(f" Original Search: Generic results that might miss relevant docs")
print(f" Expected Result: Enhanced search should return actual pension rules")
except ImportError as e:
print(f"β Import error: {e}")
print("π‘ Make sure you're running from the PensionBot directory")
except Exception as e:
print(f"β Test error: {e}")
async def test_query_enhancement():
"""Test query enhancement strategies"""
print("\nπ― Testing Query Enhancement Strategies:")
print("=" * 50)
test_queries = [
"What are the pension rules?",
"How to calculate pension?",
"Pension eligibility criteria",
"Family pension benefits",
"Commutation of pension"
]
try:
from enhanced_search_service import enhanced_search_service
for query in test_queries:
enhanced_query = enhanced_search_service._enhance_query(query)
print(f"Original: {query}")
print(f"Enhanced: {enhanced_query}")
print()
except Exception as e:
print(f"β Query enhancement test error: {e}")
if __name__ == "__main__":
print("π― Enhanced Search Test for Large Document Collections")
print("Testing improved search for pension rules with 1500+ documents")
print()
# Run the tests
asyncio.run(test_pension_search())
asyncio.run(test_query_enhancement()) |