diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index c43ddcf..68fb595 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -1,5 +1,5 @@ import { useState, useEffect, useRef } from 'react' -import { FileText, Upload, Loader2, Download, CheckCircle2, XCircle, FileDown, Github, Clock, Repeat2 } from 'lucide-react' +import { FileText, Upload, Loader2, Download, CheckCircle2, XCircle, FileDown, Github, Clock, Repeat2, Trash2 } from 'lucide-react' import { Button } from './components/ui/button' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from './components/ui/card' import { Select } from './components/ui/select' @@ -23,17 +23,18 @@ function App() { const [toastMessage, setToastMessage] = useState(null) const [timeTaken, setTimeTaken] = useState(null) const [startTime, setStartTime] = useState(null) - + // Comparison states const [showCompareModal, setShowCompareModal] = useState(false) const [recentTasks, setRecentTasks] = useState([]) const [currentComparisonTask, setCurrentComparisonTask] = useState(null) const [comparisonComparisons, setComparisonComparisons] = useState([]) const [isComparingLoading, setIsComparingLoading] = useState(false) - + const toastTimerRef = useRef(null) const timerIntervalRef = useRef(null) const comparisonProgressInterval = useRef(null) + const [togglingLibrary, setTogglingLibrary] = useState(null) useEffect(() => { loadLibraries() @@ -59,6 +60,34 @@ function App() { } } + const toggleLibrary = async (libraryName, isEnabled) => { + setTogglingLibrary(libraryName) + try { + const formData = new FormData() + formData.append('library_name', libraryName) + formData.append('is_enabled', isEnabled) + + const response = await fetch('/libraries/status', { + method: 'POST', + body: formData, + }) + + if (response.ok) { + // Refresh libraries list + await loadLibraries() + setToastMessage(`Library ${libraryName} ${isEnabled ? 'enabled' : 'disabled'}`) + setTimeout(() => setToastMessage(null), 3000) + } else { + const data = await response.json() + setError(data.detail || 'Failed to update library status') + } + } catch (err) { + setError('Failed to update library status: ' + err.message) + } finally { + setTogglingLibrary(null) + } + } + const showGPUToast = (message) => { if (toastTimerRef.current) { clearTimeout(toastTimerRef.current) @@ -134,11 +163,11 @@ function App() { setError(null) setResult(null) setTimeTaken(null) - + // Start timer const start = Date.now() setStartTime(start) - + // Update timer every 100ms if (timerIntervalRef.current) { clearInterval(timerIntervalRef.current) @@ -278,7 +307,7 @@ function App() { if (response.ok) { setCurrentComparisonTask(data.task) - + // Fetch content for each comparison const comparisonsWithContent = await Promise.all( data.comparisons.map(async (comp) => { @@ -294,7 +323,7 @@ function App() { return comp }) ) - + setComparisonComparisons(comparisonsWithContent) } } catch (err) { @@ -321,7 +350,7 @@ function App() { // Download single library result const response = await fetch(`/compare/${taskId}/content/${library}`) const data = await response.json() - + const content = typeof data.content === 'string' ? data.content : JSON.stringify(data.content, null, 2) const blob = new Blob([content], { type: outputFormat === 'json' ? 'application/json' : 'text/markdown' }) const url = URL.createObjectURL(blob) @@ -336,7 +365,7 @@ function App() { // Download all results as zip const response = await fetch(`/compare/${taskId}/download`) if (!response.ok) throw new Error('Download failed') - + const blob = await response.blob() const url = URL.createObjectURL(blob) const a = document.createElement('a') @@ -447,13 +476,12 @@ function App() { onDragOver={handleDragOver} onDragLeave={handleDragLeave} onDrop={handleDrop} - className={`border-2 border-dashed rounded-lg transition-all ${ - isDragging - ? 'border-blue-500 bg-blue-50 dark:bg-blue-950/20' - : selectedFile + className={`border-2 border-dashed rounded-lg transition-all ${isDragging + ? 'border-blue-500 bg-blue-50 dark:bg-blue-950/20' + : selectedFile ? 'border-green-300 dark:border-green-800 bg-green-50 dark:bg-green-950/20' : 'border-slate-300 dark:border-slate-700 hover:border-slate-400 dark:hover:border-slate-600' - }`} + }`} > Configuration - + {/* Library Selection */}
{/* Recent Comparisons History */} - seconds - + {/* Progress bar */}
Processing...
-
diff --git a/frontend/vite.config.js b/frontend/vite.config.js index a52ca07..1855f65 100644 --- a/frontend/vite.config.js +++ b/frontend/vite.config.js @@ -1,48 +1,48 @@ -import { defineConfig } from 'vite' -import react from '@vitejs/plugin-react' -import path from 'path' -import { fileURLToPath } from 'url' - -const __dirname = path.dirname(fileURLToPath(import.meta.url)) +import { defineConfig } from "vite"; +import react from "@vitejs/plugin-react"; +import path from "path"; +import { fileURLToPath } from "url"; +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const baseUrl = "http://localhost:8000"; +console.log("Using backend URL:", baseUrl); export default defineConfig({ plugins: [react()], resolve: { alias: { - '@': path.resolve(__dirname, './src'), + "@": path.resolve(__dirname, "./src"), }, }, build: { - outDir: '../static', + outDir: "../static", emptyOutDir: true, }, server: { proxy: { - '/libraries': { - target: 'http://localhost:8000', + "/libraries": { + target: baseUrl, changeOrigin: true, }, - '/convert': { - target: 'http://localhost:8000', + "/convert": { + target: baseUrl, changeOrigin: true, }, - '/compare': { - target: 'http://localhost:8000', + "/compare": { + target: baseUrl, changeOrigin: true, }, - '/history': { - target: 'http://localhost:8000', + "/history": { + target: baseUrl, changeOrigin: true, }, - '/stats': { - target: 'http://localhost:8000', + "/stats": { + target: baseUrl, changeOrigin: true, }, - '/health': { - target: 'http://localhost:8000', + "/health": { + target: baseUrl, changeOrigin: true, }, }, }, -}) - +}); diff --git a/main.py b/main.py index 43bbf0a..f1aec86 100644 --- a/main.py +++ b/main.py @@ -38,11 +38,10 @@ UPLOAD_DIR = "uploads" os.makedirs(UPLOAD_DIR, exist_ok=True) -# Initialize factory (singleton pattern) -factory = OCRFactory() - # Initialize comparison services db_service = DatabaseService() +# Initialize factory (singleton pattern) with DB service +factory = OCRFactory(db_service) queue_manager = QueueManager(db_service) results_manager = ResultsManager() @@ -68,6 +67,30 @@ async def get_available_libraries(): """Get list of available conversion libraries""" return {"libraries": factory.list_all_converters()} +@app.post("/libraries/status") +async def set_library_status( + library_name: str = Form(...), + is_enabled: bool = Form(...) +): + """Enable or disable a library""" + try: + # Update DB + db_service.set_library_status(library_name, is_enabled) + + # Refresh factory + factory.refresh_converters() + + return { + "status": "success", + "library": library_name, + "enabled": is_enabled, + "message": f"Library {library_name} {'enabled' if is_enabled else 'disabled'}" + } + except Exception as e: + logger.error(f"Error setting library status: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + @app.post("/convert") async def convert_pdf( file: UploadFile = File(...), diff --git a/services/db_methods_snippet.py b/services/db_methods_snippet.py new file mode 100644 index 0000000..bf61a04 --- /dev/null +++ b/services/db_methods_snippet.py @@ -0,0 +1,25 @@ + + def get_enabled_libraries(self): + """Get list of enabled library names""" + conn = sqlite3.connect(self.db_path, timeout=30.0) + cursor = conn.cursor() + + cursor.execute('SELECT library_name FROM user_libraries WHERE is_enabled = 1') + libraries = [row[0] for row in cursor.fetchall()] + + conn.close() + return libraries + + def set_library_status(self, library_name, is_enabled): + """Enable or disable a library""" + conn = sqlite3.connect(self.db_path, timeout=30.0) + cursor = conn.cursor() + + cursor.execute(''' + INSERT INTO user_libraries (library_name, is_enabled) + VALUES (?, ?) + ON CONFLICT(library_name) DO UPDATE SET is_enabled = ? + ''', (library_name, 1 if is_enabled else 0, 1 if is_enabled else 0)) + + conn.commit() + conn.close() diff --git a/services/db_service.py b/services/db_service.py index 899f690..d348d00 100644 --- a/services/db_service.py +++ b/services/db_service.py @@ -49,10 +49,36 @@ def init_db(self): CONSTRAINT status_check CHECK (status IN ('pending', 'success', 'failed', 'timeout')) ) ''') + + # User-opted libraries table + cursor.execute(''' + CREATE TABLE IF NOT EXISTS user_libraries ( + library_name TEXT PRIMARY KEY, + is_enabled BOOLEAN DEFAULT 0 + ) + ''') # Create indexes for faster queries cursor.execute('CREATE INDEX IF NOT EXISTS idx_tasks_created ON tasks(created_at DESC)') cursor.execute('CREATE INDEX IF NOT EXISTS idx_comparisons_task ON comparisons(task_id)') + + # Seed initial libraries if empty + cursor.execute('SELECT COUNT(*) FROM user_libraries') + if cursor.fetchone()[0] == 0: + initial_libraries = [ + ('pymupdf4llm', 0), + ('markitdown', 0), + ('marker', 0), + ('docling', 0), + ('paddleocr', 0), + ('deepseekocr', 0), + ('pytesseract', 0), + ('unstructured', 0), + ] + cursor.executemany( + 'INSERT INTO user_libraries (library_name, is_enabled) VALUES (?, ?)', + initial_libraries + ) conn.commit() conn.close() @@ -201,6 +227,7 @@ def get_library_stats(self): conn.close() return stats + def delete_task(self, task_id): """Delete task and its comparisons""" conn = sqlite3.connect(self.db_path, timeout=30.0) @@ -214,3 +241,29 @@ def delete_task(self, task_id): conn.commit() conn.close() + def get_enabled_libraries(self): + """Get list of enabled library names""" + conn = sqlite3.connect(self.db_path, timeout=30.0) + cursor = conn.cursor() + + cursor.execute('SELECT library_name FROM user_libraries WHERE is_enabled = 1') + libraries = [row[0] for row in cursor.fetchall()] + + conn.close() + return libraries + + def set_library_status(self, library_name, is_enabled): + """Enable or disable a library""" + conn = sqlite3.connect(self.db_path, timeout=30.0) + cursor = conn.cursor() + + cursor.execute(''' + INSERT INTO user_libraries (library_name, is_enabled) + VALUES (?, ?) + ON CONFLICT(library_name) DO UPDATE SET is_enabled = ? + ''', (library_name, 1 if is_enabled else 0, 1 if is_enabled else 0)) + + conn.commit() + conn.close() + + diff --git a/services/ocrfactory.py b/services/ocrfactory.py index 878eb47..2a2afab 100644 --- a/services/ocrfactory.py +++ b/services/ocrfactory.py @@ -16,13 +16,37 @@ class OCRFactory: Implements singleton pattern for converter instances. """ - def __init__(self): + def __init__(self, db_service=None): self._converters: Dict[str, PDFConverter] = {} + # If no DB service provided, create one + if db_service is None: + from services.db_service import DatabaseService + self.db_service = DatabaseService() + else: + self.db_service = db_service + + self._register_default_converters() + + def refresh_converters(self): + """Reload converters from database settings""" + self._converters = {} self._register_default_converters() def _register_default_converters(self): - """Register all available converter implementations""" - converters = [ + """Register all available converter implementations based on DB settings""" + # Get enabled libraries from DB + try: + enabled_libraries = set(self.db_service.get_enabled_libraries()) + except Exception as e: + logger.error(f"Failed to load enabled libraries from DB: {e}") + + logger.info(f"Loading enabled libraries: {enabled_libraries}") + + if len(enabled_libraries) == 0: + logger.warning("No enabled libraries found in DB. Registering all converters.") + return + + all_converters = [ PyMuPDF4LLMConverter(), MarkItDownConverter(), MarkerConverter(), @@ -33,7 +57,11 @@ def _register_default_converters(self): UnstructuredConverter(), ] - for converter in converters: + for converter in all_converters: + # Skip if not enabled in DB + if converter.name not in enabled_libraries: + continue + if converter.available: self._converters[converter.name] = converter logger.info(f"Registered converter: {converter.name}") @@ -52,6 +80,11 @@ def list_available_converters(self) -> List[str]: def list_all_converters(self) -> List[Dict[str, bool]]: """List all converters with their availability status""" + + try: + enabled_libraries = set(self.db_service.get_enabled_libraries()) + except Exception as e: + logger.error(f"Failed to load enabled libraries from DB: {e}") all_converters = [ "pymupdf4llm", "markitdown", @@ -70,7 +103,8 @@ def list_all_converters(self) -> List[Dict[str, bool]]: result.append({ "name": name, "available": available, - "error": None if available else getattr(converter, "error_message", "Unavailable") + "error": None if available else getattr(converter, "error_message", "Unavailable"), + "enabled": name in enabled_libraries }) return result