ChAbhishek28 commited on
Commit
a65937e
·
1 Parent(s): d474d22

Fix document setup to use correct LanceDB API

Browse files

- Fix add_document method call to use add_documents with proper document objects
- Create document objects with page_content and metadata attributes
- Add support for creating rajasthan_documents table if it doesn't exist
- Properly handle document insertion with embeddings and IDs

Files changed (1) hide show
  1. setup_documents.py +55 -18
setup_documents.py CHANGED
@@ -84,24 +84,61 @@ async def setup_sample_documents():
84
  try:
85
  logger.info("🗂️ Setting up sample government documents...")
86
 
87
- for doc in SAMPLE_DOCUMENTS:
88
- await lancedb_service.add_document(
89
- user_id="system",
90
- knowledge_base="government_docs",
91
- filename=doc["filename"],
92
- content=doc["content"],
93
- metadata={"source": doc["source"]}
94
- )
95
-
96
- # Also add to rajasthan_documents for specific queries
97
- for doc in SAMPLE_DOCUMENTS:
98
- await lancedb_service.add_document(
99
- user_id="system",
100
- knowledge_base="rajasthan_documents",
101
- filename=doc["filename"],
102
- content=doc["content"],
103
- metadata={"source": doc["source"]}
104
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  logger.info("✅ Sample documents added successfully")
107
 
 
84
  try:
85
  logger.info("🗂️ Setting up sample government documents...")
86
 
87
+ # Create document objects that match LanceDB expectations
88
+ docs = []
89
+ for doc_data in SAMPLE_DOCUMENTS:
90
+ # Create a simple document object with the expected attributes
91
+ doc = type('Document', (), {
92
+ 'page_content': doc_data["content"],
93
+ 'metadata': {"source": doc_data["source"], "filename": doc_data["filename"]}
94
+ })()
95
+ docs.append(doc)
96
+
97
+ # Add to government_docs knowledge base
98
+ await lancedb_service.add_documents(
99
+ docs=docs,
100
+ user_id="system",
101
+ knowledge_base="government_docs",
102
+ filename="sample_documents.txt"
103
+ )
104
+
105
+ # Also create a specific rajasthan_documents table entry
106
+ # Check if rajasthan_documents table exists, create if not
107
+ try:
108
+ if "rajasthan_documents" not in lancedb_service.db.table_names():
109
+ # Create the table using the same structure as documents table
110
+ import pandas as pd
111
+ import uuid
112
+ from datetime import datetime
113
+
114
+ sample_data = pd.DataFrame({
115
+ "id": [str(uuid.uuid4())],
116
+ "content": ["sample"],
117
+ "filename": ["sample"],
118
+ "vector": [lancedb_service.embedding_model.embed_query("sample")]
119
+ })
120
+ lancedb_service.db.create_table("rajasthan_documents", sample_data)
121
+ # Delete sample data
122
+ tbl = lancedb_service.db.open_table("rajasthan_documents")
123
+ tbl.delete("id = 'sample'")
124
+
125
+ # Now add the real documents
126
+ rajasthan_docs = []
127
+ for doc_data in SAMPLE_DOCUMENTS:
128
+ embedding = lancedb_service.embedding_model.embed_query(doc_data["content"])
129
+ raj_doc = {
130
+ "id": str(uuid.uuid4()),
131
+ "content": doc_data["content"],
132
+ "filename": doc_data["filename"],
133
+ "vector": embedding
134
+ }
135
+ rajasthan_docs.append(raj_doc)
136
+
137
+ df = pd.DataFrame(rajasthan_docs)
138
+ tbl.add(df)
139
+
140
+ except Exception as e:
141
+ logger.warning(f"⚠️ Could not setup rajasthan_documents table: {e}")
142
 
143
  logger.info("✅ Sample documents added successfully")
144