evermoving · ChanJianHao · Dec 22, 2024 · Dec 22, 2024 · Dec 22, 2024 · Dec 22, 2024
diff --git a/.gitignore b/.gitignore
@@ -173,3 +173,4 @@ nvidia_dependencies_linux.zip
 1.29.zip
 config.ini
 build_portable_example.py
+.idea/
diff --git a/DemoMedia/demo1.png b/DemoMedia/demo1.png
diff --git a/README.md b/README.md
@@ -1,32 +1,35 @@
 # System Captioner
 
-Generates and shows real-time captions by listening to your Windows PC's audio. Makes digital content more accessible for those who are deaf or hard of hearing, aids language learning, and more. 
+This application generates and displays real-time captions/subtitles by capturing audio output from a Windows PC. It enhances accessibility for individuals who are deaf or hard of hearing, facilitates language learning, and offers other potential applications. For example, with translation turned on, it enables near real-time communication during foreign language voice calls and provides on-the-fly subtitling for foreign language media, such as anime, without the need for pre-processing video files, and is not limited to any one software boundary.
 
 
 https://github.com/user-attachments/assets/7315ab7c-fe30-4c37-91aa-60bb32979338
 
+![ Main User Interface](DemoMedia/demo1.png)
 
 ## How it works
 
 1. Captures system audio in real-time through Windows audio loopback using PyAudioWPatch
-3. Locally transcribes the recordings using faster-whisper
-4. Displays the transcriptions as captions in a overlay window that remains always on top
+2. Locally transcribes or translates the recordings using faster-whisper
+3. Displays the transcriptions/translations as captions in an overlay window that remains always on top
 
 
 Language auto-detection, user-friendly GUI, draggable captions box, and intelligent mode that shows captions only when speech is detected.
 
-By default, the app runs on and requires **nVidia CUDA** (dependencies included). The app should work with RTX 2000, 3000 and 4000 series cards. Turning off GPU mode will make the app run on CPU; start with the smallest model and settle with the model that's stable. 
+By default, the app runs on and requires **NVIDIA CUDA** (dependencies included). The app should work with RTX 2000, 3000 and 4000 series cards. Turning off GPU mode will make the app run on CPU; start with the smallest model and settle with the model that's stable. 
 
 ## Installation (Windows)
 
 1. Download the latest standalone .zip (currently 1.38) from the releases section and extract all files. 
 
 2. Run SystemCaptioner.exe and follow the instructions.
 
-Alternatively build the standalone executable yourself using build_portable.py. You will need the nvidia_dependencies folder from the standalone .zip (/SystemCaptioner/Controller/_internal/nvidia_dependencies) and install all the dependencies using requirements.txt inside a venv first. 
+Alternatively build the standalone executable yourself using `build_portable.py`. You will need the nvidia_dependencies folder from the standalone .zip (/SystemCaptioner/Controller/_internal/nvidia_dependencies) and install all the dependencies using requirements.txt inside a venv first. After building, extra nvidia_dependencies folder is generated inside dist/SystemCaptioner/. Delete it. 
 
 ## Limitations/Troubleshooting 
 
-‼️ Occasionally, the app can take a long time to start up/load a model. If there are no clear errors in console, wait for at least a few mins or try stopping and starting model again. 
+‼️ Occasionally, the app can take a long time to start load a model. If there are no clear errors in console, wait for at least a few minutes or try stopping and starting model again. 
 
-If you experienced any issues with System Captioner, let me know in the 'Issues' page of this repo! Include the Console window log if possible. 
+⚠️ If you are getting `Library cublas64_12.dll is not found or cannot be loaded` error on console with no translation, copy `cublasLt64_11.dll` and rename to `cublasLt64_12.dll` in `\Controller\_internal\nvidia_dependencies` folder.
+
+If you experienced any issues with System Captioner, let us know in the 'Issues' page of this repo! Include the Console window log if possible. 
diff --git a/build_portable.py b/build_portable.py
@@ -6,22 +6,23 @@
 def build_portable():
     # Get the current directory
     current_dir = os.path.dirname(os.path.abspath(__file__))
-    
+
     # Define paths
     dist_path = os.path.join(current_dir, 'dist')
     build_path = os.path.join(current_dir, 'build')
     nvidia_deps_path = os.path.join(current_dir, 'nvidia_dependencies')
     icon_path = os.path.join(current_dir, 'icon.ico')
-
+    hallucinations_file = os.path.join(current_dir, 'filter_hallucinations.txt')
+
     # Get faster_whisper assets path
     faster_whisper_path = os.path.dirname(faster_whisper.__file__)
     assets_path = os.path.join(faster_whisper_path, 'assets')
-    
+
     # Clean previous builds
     for path in [dist_path, build_path]:
         if os.path.exists(path):
             shutil.rmtree(path)
-    
+
     # PyInstaller configuration for main.py
     PyInstaller.__main__.run([
         'main.py',
@@ -60,7 +61,7 @@ def build_portable():
         '--collect-all=faster_whisper',
         '--collect-all=customtkinter',
     ])
-    
+
     # PyInstaller configuration for controller.py
     PyInstaller.__main__.run([
         'controller.py',
@@ -92,32 +93,31 @@ def build_portable():
         '--collect-all=torch',
         '--collect-all=faster_whisper',
     ])
-    
+
     # Copy NVIDIA dependencies if they exist
     if os.path.exists(nvidia_deps_path):
         target_nvidia_path = os.path.join(dist_path, 'SystemCaptioner', 'nvidia_dependencies')
         if os.path.exists(target_nvidia_path):
             shutil.rmtree(target_nvidia_path)
         shutil.copytree(nvidia_deps_path, target_nvidia_path)
         print("NVIDIA dependencies copied successfully")
-
-
+
     print("Build completed successfully!")
-    
+
     # Post-build steps
     try:
         dist_system_captioner = os.path.join(dist_path, 'SystemCaptioner')
         dist_controller = os.path.join(dist_path, 'Controller')
         controller_internal = os.path.join(dist_system_captioner, 'Controller', '_internal')
-        
+
         # Move Controller folder inside SystemCaptioner
         if os.path.exists(dist_controller):
             target_controller = os.path.join(dist_system_captioner, 'Controller')
             if os.path.exists(target_controller):
                 shutil.rmtree(target_controller)
             shutil.move(dist_controller, target_controller)
             print("Controller folder moved successfully")
-        
+
         # Copy NVIDIA dependencies to Controller/_internal
         nvidia_src = os.path.join(dist_system_captioner, 'nvidia_dependencies')
         if os.path.exists(nvidia_src):
@@ -126,16 +126,22 @@ def build_portable():
                 shutil.rmtree(nvidia_dest)
             shutil.copytree(nvidia_src, nvidia_dest)
             print("NVIDIA dependencies copied to Controller/_internal successfully")
-        
+
         # Copy icon.ico from _internal to root
         icon_src = os.path.join(dist_system_captioner, '_internal', 'icon.ico')
         icon_dest = os.path.join(dist_system_captioner, 'icon.ico')
         if os.path.exists(icon_src):
             shutil.copy2(icon_src, icon_dest)
             print("icon.ico copied to root successfully")
-
+
+        # Copy filter_hallucinations.txt to root folder of System Captioner
+        if os.path.exists(hallucinations_file):
+            hallucinations_dest = os.path.join(dist_system_captioner, 'filter_hallucinations.txt')
+            shutil.copy2(hallucinations_file, hallucinations_dest)
+            print("filter_hallucinations.txt copied to root successfully")
+
         print("Post-build steps completed successfully!")
-        
+
     except Exception as e:
         print(f"Error during post-build steps: {e}")
 

diff --git a/console.py b/console.py
@@ -1,15 +1,14 @@
 import customtkinter as ctk
 from tkinter import scrolledtext
-import threading
 import queue
-import sys
+
 
 class ConsoleWindow(ctk.CTkToplevel):
     def __init__(self, console_queue, master=None, icon_path=None):
         super().__init__(master)
         self.title("Console Output")
-        self.geometry("600x400")
-        
+        self.geometry("900x450")
+
         # Set the icon for the console window
         if icon_path:
             self.iconbitmap(icon_path)
@@ -49,8 +48,10 @@ def display_message(self, message):
         self.text_area.configure(state='disabled')
         self.text_area.yview(ctk.END)
 
+
 class QueueWriter:
     """A writer object that redirects writes to a queue."""
+
     def __init__(self, log_queue):
         self.log_queue = log_queue
 

diff --git a/controller.py b/controller.py
@@ -2,43 +2,42 @@
 import sys
 import ctypes
 import threading
-import recorder
-import transcriber
-from gui import SubtitleGUI
-import queue
 import time
 import argparse
 import configparser
 
-# Update the import statement for the GUI
-from gui import SubtitleGUI  # No change needed
+# Import necessary modules from the project
+import recorder
+import transcriber
+from gui import SubtitleGUI
 
-# Change the hardcoded path to a relative path
+# Setup CUDA DLL path
 cuda_dll_path = os.path.join(os.path.dirname(__file__), "nvidia_dependencies")
 os.environ['PATH'] = f"{cuda_dll_path}{os.pathsep}{os.environ['PATH']}"
 sys.path.append(cuda_dll_path)
 
-# Explicitly add the DLL to the DLL search path
+# Add the DLL to the DLL search path
 os.add_dll_directory(cuda_dll_path)
 
+# Attempt to load the CUDA DLL
 try:
     ctypes.CDLL(os.path.join(cuda_dll_path, "cudnn_ops_infer64_8.dll"))
     print("Successfully loaded cudnn_ops_infer64_8.dll", flush=True)
 except Exception as e:
     print(f"Error loading cudnn_ops_infer64_8.dll: {e}", flush=True)
 
-def start_recording():
+def start_recording(device_index=None):
     """Start the audio recording process."""
-    device_index = args.device_index if hasattr(args, 'device_index') else None
     recorder.record_audio(device_index)
 
-def start_transcription(device):
+def start_transcription(device, args):
     """Start the audio transcription process."""
     transcriber.monitor_audio_file(
         transcriber.AUDIO_INPUT_DIR,
         transcriber.TRANSCRIPTION_OUTPUT,
         check_interval=0.2,
-        device=device
+        device=device,
+        args=args
     )
 
 def start_gui(update_queue, intelligent_mode):
@@ -50,9 +49,17 @@ def start_gui(update_queue, intelligent_mode):
     parser = argparse.ArgumentParser(description="TranscriberX Application")
     parser.add_argument('--intelligent', action='store_true', help='Enable intelligent mode')
     parser.add_argument('--cuda', action='store_true', help='Enable CUDA for transcription')
-    parser.add_argument('--model', type=str, choices=['tiny', 'base', 'small', 'medium', 'large'], 
+    parser.add_argument('--model', type=str, choices=['tiny.en', 'tiny', 'base.en', 'base', 'small.en', 'small', 'medium.en', 'medium',
+                'large-v1', 'large-v2', 'large-v3', 'large', 'distil-large-v2', 'distil-medium.en',
+                'distil-small.en', 'distil-large-v3'],
                         help='Select the model size for transcription')
     parser.add_argument('--device-index', type=int, help='Audio device index for recording')
+    parser.add_argument('--transcription-timeout', type=int, default=5, help='Transcription timeout in seconds')
+    parser.add_argument('--workers', type=int, default=4, help='Number of worker threads')
+    parser.add_argument('--translation-enabled', action='store_true', help='Enable translation')
+    parser.add_argument('--source-language', type=str, default='en', help='Source language for transcription')
+    parser.add_argument('--filter-hallucinations', action='store_true', help='Filter hallucinations using filter_hallucinations.txt')
+    parser.add_argument('--store-output', action='store_true', help='Store transcription output in transcriptions.txt')
     args = parser.parse_args()
 
     # Update config with the selected model
@@ -70,8 +77,8 @@ def start_gui(update_queue, intelligent_mode):
     device = "cuda" if args.cuda else "cpu"
 
     # Create threads for recording, transcription, and GUI
-    recording_thread = threading.Thread(target=start_recording, daemon=True)
-    transcription_thread = threading.Thread(target=start_transcription, args=(device,), daemon=True)
+    recording_thread = threading.Thread(target=start_recording, args=(args.device_index,), daemon=True)
+    transcription_thread = threading.Thread(target=start_transcription, args=(device, args), daemon=True)
     gui_thread = threading.Thread(target=start_gui, args=(transcription_queue, args.intelligent), daemon=True)
 
     # Start the threads
@@ -85,3 +92,4 @@ def start_gui(update_queue, intelligent_mode):
             time.sleep(1)
     except KeyboardInterrupt:
         print("Exiting program.", flush=True)
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -173,3 +173,4 @@ nvidia_dependencies_linux.zip @@
 .29.zip
     config.ini
     build_portable_example.py
+    .idea/