Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Download LanceDB data from Hugging Face LFS storage at startup. | |
| This script downloads the Git LFS files that contain the 11,555 document database. | |
| """ | |
| import os | |
| import subprocess | |
| from pathlib import Path | |
| def download_lfs_files(): | |
| """Download Git LFS files from the Hugging Face Space repository.""" | |
| print("π½ Checking for LanceDB data...") | |
| lancedb_path = Path("lancedb_data") | |
| # Check if database already exists and has data | |
| if lancedb_path.exists(): | |
| rajasthan_docs = lancedb_path / "rajasthan_documents.lance" | |
| if rajasthan_docs.exists(): | |
| # Check if it's a real file (not LFS pointer) | |
| version_file = rajasthan_docs / "_versions" | |
| if version_file.exists(): | |
| print("β LanceDB data already downloaded") | |
| return True | |
| else: | |
| print("β οΈ LanceDB data is LFS pointer - downloading actual files...") | |
| # Try to download LFS files using huggingface_hub | |
| try: | |
| from huggingface_hub import snapshot_download | |
| space_id = "ChAbhishek28/PensionBot" | |
| print(f"π₯ Downloading LFS files from {space_id}...") | |
| # Download only the lancedb_data folder | |
| snapshot_download( | |
| repo_id=space_id, | |
| repo_type="space", | |
| allow_patterns="lancedb_data/**", | |
| local_dir=".", | |
| local_dir_use_symlinks=False, | |
| ) | |
| print("β LFS files downloaded successfully") | |
| # Verify download | |
| if lancedb_path.exists(): | |
| print(f"π lancedb_data exists at: {lancedb_path.absolute()}") | |
| rajasthan_docs = lancedb_path / "rajasthan_documents.lance" | |
| if rajasthan_docs.exists(): | |
| version_file = rajasthan_docs / "_versions" | |
| print(f"π rajasthan_documents.lance exists: {rajasthan_docs.absolute()}") | |
| print(f"π _versions exists: {version_file.exists()}") | |
| if version_file.exists(): | |
| version_files = list(version_file.iterdir()) | |
| print(f"π Found {len(version_files)} version files") | |
| else: | |
| print(f"β οΈ lancedb_data NOT found at: {lancedb_path.absolute()}") | |
| print(f"π Current directory: {Path.cwd()}") | |
| print(f"π Contents: {list(Path.cwd().iterdir())[:10]}") | |
| return True | |
| except Exception as e: | |
| print(f"β Failed to download LFS files: {e}") | |
| print("β οΈ Voice Bot will start with minimal documents") | |
| return False | |
| if __name__ == "__main__": | |
| download_lfs_files() | |