Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Test Enhanced Search for Pension Rules Query | |
| Demonstrates improved search results for "What are the pension rules?" with 1500+ documents | |
| """ | |
| import asyncio | |
| import logging | |
| import sys | |
| import os | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') | |
| logger = logging.getLogger(__name__) | |
| async def test_pension_search(): | |
| """Test enhanced search vs original search for pension rules""" | |
| print("π Testing Enhanced Search for Large Document Collection (1500+ docs)") | |
| print("=" * 70) | |
| # Test query that was giving wrong results | |
| test_query = "What are the pension rules?" | |
| try: | |
| # Import after adding to path | |
| from enhanced_search_service import enhanced_search_service | |
| from lancedb_service import lancedb_service | |
| print(f"π Query: '{test_query}'") | |
| print(f"π Document collection size: ~1500 documents") | |
| print() | |
| # Test enhanced search | |
| print("π Testing Enhanced Search Strategy:") | |
| print("-" * 40) | |
| enhanced_results = await enhanced_search_service.enhanced_pension_search(test_query, limit=5) | |
| if enhanced_results: | |
| print(f"β Enhanced search found {len(enhanced_results)} relevant documents:") | |
| for i, result in enumerate(enhanced_results[:3], 1): | |
| content = result.get('content', '') | |
| filename = result.get('filename', 'Unknown') | |
| # Show snippet with pension-related content | |
| lines = content.split('\n') | |
| pension_lines = [line.strip() for line in lines if 'pension' in line.lower()] | |
| print(f"\n{i}. Document: {filename}") | |
| if pension_lines: | |
| print(f" Pension content preview:") | |
| for line in pension_lines[:2]: # Show first 2 pension-related lines | |
| if line: | |
| print(f" β’ {line[:80]}{'...' if len(line) > 80 else ''}") | |
| else: | |
| # Show general content preview | |
| preview = content[:150].replace('\n', ' ').strip() | |
| print(f" Content preview: {preview}{'...' if len(content) > 150 else ''}") | |
| else: | |
| print("β Enhanced search found no results") | |
| print("\n" + "=" * 70) | |
| # Test fallback to original search | |
| print("β οΈ Original Search Strategy (for comparison):") | |
| print("-" * 40) | |
| try: | |
| original_results = await lancedb_service.search_documents(test_query, limit=5) | |
| if original_results: | |
| print(f"π Original search found {len(original_results)} documents:") | |
| for i, result in enumerate(original_results[:3], 1): | |
| content = result.get('content', '') | |
| filename = result.get('filename', 'Unknown') | |
| print(f"\n{i}. Document: {filename}") | |
| preview = content[:150].replace('\n', ' ').strip() | |
| print(f" Content preview: {preview}{'...' if len(content) > 150 else ''}") | |
| # Check if it's actually pension-related | |
| if 'pension' in content.lower(): | |
| print(f" β Contains pension content") | |
| else: | |
| print(f" β No pension content detected") | |
| else: | |
| print("β Original search found no results") | |
| except Exception as e: | |
| print(f"β Original search failed: {e}") | |
| print("\n" + "=" * 70) | |
| print("π Search Comparison Summary:") | |
| print(f" Enhanced Search: Better targeting of pension-specific content") | |
| print(f" Original Search: Generic results that might miss relevant docs") | |
| print(f" Expected Result: Enhanced search should return actual pension rules") | |
| except ImportError as e: | |
| print(f"β Import error: {e}") | |
| print("π‘ Make sure you're running from the PensionBot directory") | |
| except Exception as e: | |
| print(f"β Test error: {e}") | |
| async def test_query_enhancement(): | |
| """Test query enhancement strategies""" | |
| print("\nπ― Testing Query Enhancement Strategies:") | |
| print("=" * 50) | |
| test_queries = [ | |
| "What are the pension rules?", | |
| "How to calculate pension?", | |
| "Pension eligibility criteria", | |
| "Family pension benefits", | |
| "Commutation of pension" | |
| ] | |
| try: | |
| from enhanced_search_service import enhanced_search_service | |
| for query in test_queries: | |
| enhanced_query = enhanced_search_service._enhance_query(query) | |
| print(f"Original: {query}") | |
| print(f"Enhanced: {enhanced_query}") | |
| print() | |
| except Exception as e: | |
| print(f"β Query enhancement test error: {e}") | |
| if __name__ == "__main__": | |
| print("π― Enhanced Search Test for Large Document Collections") | |
| print("Testing improved search for pension rules with 1500+ documents") | |
| print() | |
| # Run the tests | |
| asyncio.run(test_pension_search()) | |
| asyncio.run(test_query_enhancement()) |