From 23291cac9d93854baf077998557281471bd862b3 Mon Sep 17 00:00:00 2001 From: Karan Kumar Bhagat <92136711+karandomguy@users.noreply.github.com> Date: Thu, 4 Sep 2025 11:17:33 +0530 Subject: [PATCH] Update document_processing.py --- document_processing.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/document_processing.py b/document_processing.py index 3d89845..366f8fa 100644 --- a/document_processing.py +++ b/document_processing.py @@ -5,11 +5,6 @@ import re import pandas as pd from sentence_transformers import SentenceTransformer -import chromadb -from dotenv import load_dotenv -import requests -from bs4 import BeautifulSoup -import time load_dotenv() @@ -65,8 +60,6 @@ def load_documents(self, filename: str = "medical_documents.json") -> List[Dict[ def save_documents(self, documents: List[Dict[str, Any]], filename: str = "medical_documents.json"): """Save documents to JSON file""" - os.makedirs(self.data_dir, exist_ok=True) - filepath = os.path.join(self.data_dir, filename) with open(filepath, 'w') as f: json.dump(documents, f, indent=2) print(f"Saved {len(documents)} documents to {filepath}") @@ -244,4 +237,4 @@ def run_pipeline(self, json_file=None, urls=None): medical_urls = [] count = processor.run_pipeline(urls=medical_urls) - print(f"Successfully processed documents into {count} chunks in the vector database") \ No newline at end of file + print(f"Successfully processed documents into {count} chunks in the vector database")