|
|
""" |
|
|
Upload documents tab functionality for the Gradio app |
|
|
""" |
|
|
import gradio as gr |
|
|
|
|
|
def upload_documents(files, global_vars): |
|
|
"""Handle document upload and processing""" |
|
|
doc_ingestion = global_vars.get('doc_ingestion') |
|
|
|
|
|
if not doc_ingestion: |
|
|
return "β Please initialize systems first using the 'Initialize System' tab!" |
|
|
|
|
|
if not files: |
|
|
return "β Please upload at least one PDF file." |
|
|
|
|
|
try: |
|
|
|
|
|
pdf_files = [] |
|
|
for file_path in files: |
|
|
if file_path.endswith('.pdf'): |
|
|
pdf_files.append(file_path) |
|
|
|
|
|
if not pdf_files: |
|
|
return "β Please upload PDF files only." |
|
|
|
|
|
print(f"π Processing {len(pdf_files)} PDF file(s)...") |
|
|
|
|
|
|
|
|
documents = doc_ingestion.process_documents(pdf_files) |
|
|
|
|
|
if documents: |
|
|
print("π Creating vector store...") |
|
|
|
|
|
vectorstore = doc_ingestion.create_vector_store(documents) |
|
|
|
|
|
if vectorstore: |
|
|
|
|
|
global_vars['vectorstore'] = vectorstore |
|
|
|
|
|
|
|
|
summary = f"β
Successfully processed {len(documents)} document(s):\n\n" |
|
|
|
|
|
for i, doc in enumerate(documents, 1): |
|
|
metadata = doc.metadata |
|
|
university = metadata.get('university', 'Unknown') |
|
|
country = metadata.get('country', 'Unknown') |
|
|
doc_type = metadata.get('document_type', 'Unknown') |
|
|
language = metadata.get('language', 'Unknown') |
|
|
|
|
|
summary += f"{i}. **{metadata['source']}**\n" |
|
|
summary += f" - University: {university}\n" |
|
|
summary += f" - Country: {country}\n" |
|
|
summary += f" - Type: {doc_type}\n" |
|
|
summary += f" - Language: {language}\n\n" |
|
|
|
|
|
summary += "π **Ready for queries!** Go to the 'Search & Query' tab to start asking questions." |
|
|
return summary |
|
|
else: |
|
|
return "β Failed to create vector store from documents." |
|
|
else: |
|
|
return "β No documents were successfully processed. Please check if your PDFs are readable." |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Error processing documents: {str(e)}\n\nPlease check the console for more details." |
|
|
|
|
|
def create_upload_tab(global_vars): |
|
|
"""Create the Upload Documents tab""" |
|
|
with gr.Tab("π Upload Documents", id="upload"): |
|
|
gr.Markdown(""" |
|
|
### Step 2: Upload PDF Documents |
|
|
Upload university documents (brochures, admission guides, etc.) in PDF format. |
|
|
The system will automatically extract metadata including university name, country, and document type. |
|
|
""") |
|
|
|
|
|
file_upload = gr.File( |
|
|
label="π Upload PDF Documents", |
|
|
file_types=[".pdf"], |
|
|
file_count="multiple", |
|
|
height=120 |
|
|
) |
|
|
|
|
|
upload_btn = gr.Button( |
|
|
"π Process Documents", |
|
|
variant="primary", |
|
|
size="lg" |
|
|
) |
|
|
|
|
|
upload_status = gr.Textbox( |
|
|
label="Processing Status", |
|
|
interactive=False, |
|
|
lines=12, |
|
|
placeholder="Upload PDF files and click 'Process Documents'..." |
|
|
) |
|
|
|
|
|
upload_btn.click( |
|
|
lambda files: upload_documents(files, global_vars), |
|
|
inputs=file_upload, |
|
|
outputs=upload_status |
|
|
) |
|
|
|