From 59ae65a8de5b6b74bfc811b4ec2e58056e0a79b3 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Sun, 17 Nov 2024 23:57:26 +0000
Subject: [PATCH 01/66] Nov 17, 2024, 3:57 PM

---
 .gitignore                                |  27 +++
 sample-shrinker-python/README.md          | 130 +++++++++++
 sample-shrinker-python/requirements.txt   |   4 +
 sample-shrinker-python/sample-shrinker.py | 253 ++++++++++++++++++++++
 4 files changed, 414 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 sample-shrinker-python/README.md
 create mode 100644 sample-shrinker-python/requirements.txt
 create mode 100644 sample-shrinker-python/sample-shrinker.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..061bfe6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,27 @@
+
+# Ignore macOS metadata files
+.DS_Store
+._*
+Thumbs.db
+
+# Ignore Python bytecode
+__pycache__/
+*.py[cod]
+
+# Ignore logs and backup files
+*.log
+_backup/
+
+# Ignore output files like spectrograms
+*.png
+
+# Ignore directories created by the script
+sample-shrinker-python/_backup/
+sample-shrinker-python/*.log
+sample-shrinker-python/*.png
+
+# Virtual environment files
+venv/
+env/
+.venv
+sample-shrinker_venv/
\ No newline at end of file
diff --git a/sample-shrinker-python/README.md b/sample-shrinker-python/README.md
new file mode 100644
index 0000000..44c5d3c
--- /dev/null
+++ b/sample-shrinker-python/README.md
@@ -0,0 +1,130 @@
+
+# Sample Shrinker
+
+A Python script to conditionally batch-convert audio samples into minimal `.wav` files, based on target criteria. This script is useful for saving storage space and reducing the I/O stress during simultaneous real-time streaming of multiple `.wav` files on devices like the Dirtywave M8 tracker.
+
+If you have directories full of 24/32-bit stereo `.wav` files or stereo samples with effectively mono content, this script can reclaim wasted storage space and reduce I/O stress on your SD card. It can also detect if the content of a stereo sample is actually mono and convert it automatically!
+
+## Features
+- **Conditional Conversion**: Only converts samples that don't meet the target criteria (bit depth, channels, etc.).
+- **Auto-Mono**: Automatically convert stereo samples to mono if the content is effectively mono, with a configurable threshold.
+- **Backup and Spectrogram Generation**: Converted files are backed up (unless disabled) and spectrograms of old vs. new files are generated.
+- **Pre-Normalization**: Optionally normalize samples before downsampling the bit depth to preserve dynamic range.
+- **Parallel Processing**: Use the `-j` option to process multiple files in parallel for faster conversions.
+
+## Requirements
+
+- Python 3.10 or later
+- `pydub`, `librosa`, `matplotlib`, `soundfile` (install with `pip`)
+- `ffmpeg` or `libav` installed for `pydub`
+
+Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+
+You will also need `ffmpeg`:
+```bash
+# MacOS with Homebrew
+brew install ffmpeg
+
+# Ubuntu/Debian
+sudo apt install ffmpeg
+```
+
+## Usage
+
+```bash
+python sample-shrinker.py [options] FILE|DIRECTORY ...
+```
+
+### Basic Example:
+```bash
+python sample-shrinker.py directory_of_samples/
+```
+
+This will:
+- Convert samples in place with a target bit depth of 16 and stereo channels unchanged.
+- Back up the original files in a parallel `_backup/` directory.
+- Generate `.png` spectrograms comparing old and new files.
+
+### Options:
+- `-b BIT_DEPTH`: Set the target bit depth (default: 16). Samples will only be downsampled unless `-B` is set.
+- `-B MIN_BIT_DEPTH`: Set a minimum bit depth. This will upsample any samples below the minimum.
+- `-c CHANNELS`: Set the target number of output channels (default: 2). For mono, use `-c 1`.
+- `-r SAMPLERATE`: Set the target sample rate (default: 44100 Hz).
+- `-R MIN_SAMPLERATE`: Set a minimum sample rate. Samples below this will be upsampled.
+- `-a`: Automatically convert stereo samples to mono if they are effectively mono.
+- `-A DB_THRESHOLD`: Set the auto-mono threshold in dB (default: `-95.5`). This implies `-a`.
+- `-p`: Pre-normalize samples before downsampling bit depth.
+- `-S`: Skip generating spectrogram files.
+- `-d BACKUP_DIR`: Set a directory to store backups. Use `-d -` to disable backups and spectrogram generation.
+- `-l`: List files and preview changes without converting.
+- `-n`: Dry run—log actions without converting any files.
+- `-j JOBS`: Process files in parallel with multiple jobs (default: 1).
+- `-v`: Increase verbosity.
+
+## Examples
+
+### Convert a Directory with Default Settings
+```bash
+python sample-shrinker.py my_samples/
+```
+- Convert samples to 16-bit with channels left unchanged.
+- Back up the original files under `_backup/`.
+- Generate spectrogram `.png` files for comparison.
+
+### Convert to Mono Automatically for Effectively Mono Samples
+```bash
+python sample-shrinker.py -a my_samples/
+```
+- Automatically convert stereo samples to mono if they are effectively mono (i.e., the difference between the channels is below the threshold).
+
+### Preview Changes Without Modifying Files
+```bash
+python sample-shrinker.py -l -a -A -80 my_samples/
+```
+- Lists all files and shows which ones would be changed without actually modifying them. The threshold for auto-mono is set to -80 dB.
+
+### Convert and Skip Backups
+```bash
+python sample-shrinker.py -d - my_samples/
+```
+- Converts files but does not create backups or generate spectrograms.
+
+### Pre-Normalize Before Downsampling
+```bash
+python sample-shrinker.py -p my_samples/
+```
+- Normalize the audio before downsampling the bit depth to preserve as much dynamic range as possible.
+
+### Process Files in Parallel
+```bash
+python sample-shrinker.py -j 10 my_samples/
+```
+- Process up to 10 files at the same time for faster batch conversion.
+
+## Output Example:
+
+```bash
+Processing file: /Volumes/Untitled/Samples/wii sports/sound effects/Baseball/Sample_0028.wav
+/Volumes/Untitled/Samples/wii sports/sound effects/Baseball/Sample_0028.wav [UNCHANGED]
+Processing file: /Volumes/Untitled/Samples/wii sports/sound effects/Boxing/Sample_0029.wav
+/Volumes/Untitled/Samples/wii sports/sound effects/Baseball/Sample_0029.wav [CHANGED]: sample rate 48000 -> 44100
+Processing file: /Volumes/Untitled/Samples/wii sports/sound effects/Boxing/Sample_0030.wav
+/Volumes/Untitled/Samples/wii sports/sound effects/Baseball/Sample_0030.wav[CHANGED]: auto-mono
+```
+
+In the updated output format:
+- The script logs each file being processed with the `Processing file:` prefix.
+- After processing, each file will either be marked as `[UNCHANGED]` or `[CHANGED]` depending on whether any modifications (bit depth, sample rate, or channels) were made.
+- If changes are made, the specific adjustments (e.g., `sample rate 48000 -> 44100`) will be displayed.
+  
+### Additional Details:
+- The `[CHANGED]` notation follows files that were modified.
+- `[UNCHANGED]` appears for files that meet the target criteria and required no modifications.
+- **Changes made**:
+  - Sample rate conversions (e.g., `sample rate 48000 -> 44100`).
+  - Bit depth reductions (e.g., `bit depth 32 -> 16`).
+  - Channel conversions (e.g., stereo to mono).
+- Verbose output (`-v`) will print additional information such as ongoing file processing.
diff --git a/sample-shrinker-python/requirements.txt b/sample-shrinker-python/requirements.txt
new file mode 100644
index 0000000..4a880fc
--- /dev/null
+++ b/sample-shrinker-python/requirements.txt
@@ -0,0 +1,4 @@
+librosa==0.10.2.post1
+matplotlib==3.9.2
+numpy==2.1.2
+pydub==0.25.1
diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
new file mode 100644
index 0000000..c283fad
--- /dev/null
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -0,0 +1,253 @@
+import os
+import shutil
+import argparse
+import soundfile as sf
+from pydub import AudioSegment
+import librosa
+import matplotlib.pyplot as plt
+import numpy as np
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import concurrent.futures 
+
+def usage_intro():
+    return """
+Conditionally batch-converts audio samples into minimal .wav files.
+
+Each DIRECTORY is recursively searched for audio files to process, based on their extension (configured with -x). Any FILE specified directly will be processed (regardless of its extension).
+
+If a sample does not already meet the target BIT_DEPTH or CHANNELS, it will be converted in place and the original will be backed up to a parallel directory structure.
+
+Upon conversion, spectrogram .png files are generated alongside the backed-up original file to compare the original vs new audio files (disable with -S).
+
+Examples:
+    Recursively convert samples under 'sample_dir/' using the default settings:
+        $ sample-shrinker.py sample_dir/
+    Convert samples down to 8-bit, mono:
+        $ sample-shrinker.py -c 1 -b 8 sample_dir/
+    Auto-convert stereo samples to mono:
+        $ sample-shrinker.py -a sample_dir/
+    """
+
+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description="Batch convert audio files.")
+    parser.add_argument('files', nargs='+', help='Files or directories to process')
+    parser.add_argument('-b', '--bitdepth', type=int, default=16, help='Target bit depth (8, 16, 24)')
+    parser.add_argument('-B', '--min_bitdepth', type=int, help='Minimum bit depth (8, 16, 24)')
+    parser.add_argument('-c', '--channels', type=int, default=2, help='Target number of channels (1=mono, 2=stereo)')
+    parser.add_argument('-r', '--samplerate', type=int, default=44100, help='Target sample rate')
+    parser.add_argument('-R', '--min_samplerate', type=int, help='Minimum sample rate')
+    parser.add_argument('-x', '--ext', default='wav', help='File extension to search for (default: wav)')
+    parser.add_argument('-a', '--auto_mono', action='store_true', help='Automatically convert stereo samples to mono')
+    parser.add_argument('-A', '--auto_mono_threshold', type=float, default=-95.5, help='Auto-mono threshold dB')
+    parser.add_argument('-S', '--skip_spectrograms', action='store_true', help='Skip generating spectrogram files')
+    parser.add_argument('-d', '--backup_dir', default="_backup", help='Directory to store backups (default: _backup)')
+    parser.add_argument('-p', '--pre_normalize', action='store_true', help='Pre-normalize before downsampling bit-depth')
+    parser.add_argument('-l', '--list', action='store_true', help='List files without converting')
+    parser.add_argument('-n', '--dry_run', action='store_true', help='Log actions without converting')
+    parser.add_argument('-j', '--jobs', type=int, default=1, help='Number of parallel jobs (default: 1)')
+    parser.add_argument('-v', '--verbose', action='store_true', help='Increase verbosity')
+    
+    return parser.parse_args()
+
+def delete_resource_forks(directory):
+    """Recursively find and delete all '._' resource fork files in the directory."""
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            if file.startswith("._"):
+                file_path = os.path.join(root, file)
+                print(f"Deleting resource fork file: {file_path}")
+                os.remove(file_path)
+
+def reencode_audio(file_path):
+    """Re-encode audio file to PCM 16-bit if it has a different encoding."""
+    try:
+        with sf.SoundFile(file_path) as f:
+            print(f"Audio encoding: {f.format}, subtype: {f.subtype}, channels: {f.channels}")
+            if f.subtype != 'PCM_16':
+                # If the file is not PCM 16, re-save it as PCM_16
+                data, samplerate = sf.read(file_path)
+                temp_output = file_path.replace(os.path.splitext(file_path)[1], "_reencoded.wav")
+                sf.write(temp_output, data, samplerate, subtype='PCM_16')
+                print(f"File re-encoded to PCM_16: {file_path} -> {temp_output}")
+                return temp_output
+    except Exception as e:
+        print(f"Error re-encoding {file_path}: {e}")
+    return None
+
+def process_audio(file_path, args, dry_run=False):
+    """Main function to process audio files based on arguments."""
+    try:
+        print(f"Processing file: {file_path}")  # Debug logging to trace progress
+        audio = AudioSegment.from_file(file_path)
+        modified = False
+        change_reason = []
+
+        # Check if we need to convert the channels
+        if audio.channels > args.channels:
+            change_reason.append("channels")
+            if not dry_run:
+                audio = audio.set_channels(args.channels)
+            modified = True
+
+        # Auto-mono logic: convert stereo to mono if it is effectively mono
+        if args.auto_mono and audio.channels == 2:
+            mono_candidate = check_effectively_mono(audio, args.auto_mono_threshold)
+            if mono_candidate:
+                change_reason.append("auto-mono")
+                if not dry_run:
+                    audio = audio.set_channels(1)
+                modified = True
+
+        # Pre-normalize before downsampling bit depth if necessary
+        if args.pre_normalize:
+            change_reason.append("pre-normalize")
+            if not dry_run:
+                audio = audio.apply_gain(-audio.max_dBFS)
+            modified = True
+
+        # Check if we need to convert the bit depth
+        if audio.sample_width * 8 > args.bitdepth:
+            change_reason.append(f"bit depth {audio.sample_width * 8} -> {args.bitdepth}")
+            if not dry_run:
+                audio = audio.set_sample_width(args.bitdepth // 8)
+            modified = True
+
+        # Sample rate conversion logic: Downsample only
+        if audio.frame_rate > args.samplerate:
+            change_reason.append(f"sample rate {audio.frame_rate} -> {args.samplerate}")
+            if not dry_run:
+                audio = audio.set_frame_rate(args.samplerate)
+            modified = True
+        elif args.min_samplerate and audio.frame_rate < args.min_samplerate:
+            # Only upsample if the user specifies a minimum sample rate
+            change_reason.append(f"sample rate {audio.frame_rate} -> {args.min_samplerate}")
+            if not dry_run:
+                audio = audio.set_frame_rate(args.min_samplerate)
+            modified = True
+
+        if modified:
+            print(f"{file_path} [CHANGED]: {', '.join(change_reason)}")
+            if not dry_run:
+                # Backup the original file if required
+                if args.backup_dir != "-":
+                    backup_path = os.path.join(args.backup_dir, os.path.basename(file_path))
+                    os.makedirs(os.path.dirname(backup_path), exist_ok=True)
+                    shutil.copy(file_path, backup_path)
+
+                # Export the converted audio file
+                output_file = file_path.replace(os.path.splitext(file_path)[1], ".wav")
+                audio.export(output_file, format="wav")
+
+                # Generate spectrogram if enabled
+                if not args.skip_spectrograms:
+                    generate_spectrogram(file_path, output_file, args.backup_dir)
+        else:
+            print(f"{file_path} [UNCHANGED]")
+
+    except Exception as e:
+        print(f"Error processing {file_path}: {e}")
+
+        # Try re-encoding the file if ffmpeg failed
+        reencoded_file = reencode_audio(file_path)
+        if reencoded_file:
+            try:
+                # Retry the process with the re-encoded file
+                process_audio(reencoded_file, args, dry_run)
+            except Exception as retry_error:
+                print(f"Failed to process the re-encoded file {reencoded_file}: {retry_error}")
+
+def check_effectively_mono(audio, threshold_dB):
+    """Check if a stereo file is effectively mono."""
+    left_channel = audio.split_to_mono()[0]
+    right_channel = audio.split_to_mono()[1].invert_phase()
+
+    difference = left_channel.overlay(right_channel)
+    peak_diff_db = difference.max_dBFS
+    return peak_diff_db < threshold_dB
+
+def generate_spectrogram(original_file, new_file, backup_dir):
+    """Generate and save spectrograms for the original and new files."""
+    y_old, sr_old = librosa.load(original_file, sr=None)
+    y_new, sr_new = librosa.load(new_file, sr=None)
+
+    # Spectrogram for original file
+    plt.figure(figsize=(10, 4))
+    D_old = librosa.amplitude_to_db(np.abs(librosa.stft(y_old)), ref=np.max)
+    librosa.display.specshow(D_old, sr=sr_old, x_axis='time', y_axis='log')
+    plt.colorbar(format='%+2.0f dB')
+    plt.title(f'Spectrogram of {original_file}')
+    old_spectrogram_path = os.path.join(backup_dir, os.path.basename(original_file) + ".old.png")
+    plt.savefig(old_spectrogram_path)
+    plt.close()
+
+    # Spectrogram for new file
+    plt.figure(figsize=(10, 4))
+    D_new = librosa.amplitude_to_db(np.abs(librosa.stft(y_new)), ref=np.max)
+    librosa.display.specshow(D_new, sr=sr_new, x_axis='time', y_axis='log')
+    plt.colorbar(format='%+2.0f dB')
+    plt.title(f'Spectrogram of {new_file}')
+    new_spectrogram_path = os.path.join(backup_dir, os.path.basename(new_file) + ".new.png")
+    plt.savefig(new_spectrogram_path)
+    plt.close()
+
+def list_files(args, file_list):
+    """Prints file summary and actions without performing them."""
+    for file_path in file_list:
+        print(f"Previewing: {file_path}")
+
+def collect_files(args):
+    """Collect all files from provided directories and files, skipping resource fork files."""
+    file_list = []
+    for path in args.files:
+        if os.path.isdir(path):
+            for root, dirs, files in os.walk(path):
+                for file in files:
+                    if file.endswith(f".{args.ext}") and not file.startswith("._"):
+                        file_list.append(os.path.join(root, file))
+        elif os.path.isfile(path):
+            if not os.path.basename(path).startswith("._"):
+                file_list.append(path)
+    return file_list
+
+def run_in_parallel(file_list, args):
+    """Run the audio processing in parallel."""
+    try:
+        with ThreadPoolExecutor(max_workers=args.jobs) as executor:
+            futures = {executor.submit(process_audio, file, args): file for file in file_list}
+            for future in concurrent.futures.as_completed(futures):
+                try:
+                    result = future.result()  # Get the result of the future (processed file)
+                except Exception as exc:
+                    file = futures[future]
+                    print(f"File {file} generated an exception: {exc}")
+    except KeyboardInterrupt:
+        print("Received KeyboardInterrupt, attempting to cancel all threads...")
+        executor.shutdown(wait=False, cancel_futures=True)
+        raise
+
+def main():
+    args = parse_args()
+
+    # Ensure that at least one file or directory is provided
+    if not args.files:
+        print(usage_intro())
+        return
+
+    # Delete all '._' files before processing anything
+    for path in args.files:
+        if os.path.isdir(path):
+            delete_resource_forks(path)
+
+    # Collect the files to process
+    file_list = collect_files(args)
+
+    if args.dry_run or args.list:
+        list_files(args, file_list)
+        for file in file_list:
+            process_audio(file, args, dry_run=True)
+    else:
+        run_in_parallel(file_list, args)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From cb0b4cc4bcfac6454c52924e71577d3768c4d02a Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 00:11:47 +0000
Subject: [PATCH 02/66] Nov 17, 2024, 4:11 PM

---
 sample-shrinker-python/sample-shrinker.py | 174 ++++++++++++++++------
 1 file changed, 130 insertions(+), 44 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index c283fad..3e66d66 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -1,13 +1,15 @@
+import argparse
+import concurrent.futures
 import os
 import shutil
-import argparse
-import soundfile as sf
-from pydub import AudioSegment
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
 import librosa
 import matplotlib.pyplot as plt
 import numpy as np
-from concurrent.futures import ThreadPoolExecutor, as_completed
-import concurrent.futures 
+import soundfile as sf
+from pydub import AudioSegment
+
 
 def usage_intro():
     return """
@@ -28,28 +30,78 @@ def usage_intro():
         $ sample-shrinker.py -a sample_dir/
     """
 
+
 def parse_args():
     """Parse command line arguments."""
     parser = argparse.ArgumentParser(description="Batch convert audio files.")
-    parser.add_argument('files', nargs='+', help='Files or directories to process')
-    parser.add_argument('-b', '--bitdepth', type=int, default=16, help='Target bit depth (8, 16, 24)')
-    parser.add_argument('-B', '--min_bitdepth', type=int, help='Minimum bit depth (8, 16, 24)')
-    parser.add_argument('-c', '--channels', type=int, default=2, help='Target number of channels (1=mono, 2=stereo)')
-    parser.add_argument('-r', '--samplerate', type=int, default=44100, help='Target sample rate')
-    parser.add_argument('-R', '--min_samplerate', type=int, help='Minimum sample rate')
-    parser.add_argument('-x', '--ext', default='wav', help='File extension to search for (default: wav)')
-    parser.add_argument('-a', '--auto_mono', action='store_true', help='Automatically convert stereo samples to mono')
-    parser.add_argument('-A', '--auto_mono_threshold', type=float, default=-95.5, help='Auto-mono threshold dB')
-    parser.add_argument('-S', '--skip_spectrograms', action='store_true', help='Skip generating spectrogram files')
-    parser.add_argument('-d', '--backup_dir', default="_backup", help='Directory to store backups (default: _backup)')
-    parser.add_argument('-p', '--pre_normalize', action='store_true', help='Pre-normalize before downsampling bit-depth')
-    parser.add_argument('-l', '--list', action='store_true', help='List files without converting')
-    parser.add_argument('-n', '--dry_run', action='store_true', help='Log actions without converting')
-    parser.add_argument('-j', '--jobs', type=int, default=1, help='Number of parallel jobs (default: 1)')
-    parser.add_argument('-v', '--verbose', action='store_true', help='Increase verbosity')
-    
+    parser.add_argument("files", nargs="+", help="Files or directories to process")
+    parser.add_argument(
+        "-b", "--bitdepth", type=int, default=16, help="Target bit depth (8, 16, 24)"
+    )
+    parser.add_argument(
+        "-B", "--min_bitdepth", type=int, help="Minimum bit depth (8, 16, 24)"
+    )
+    parser.add_argument(
+        "-c",
+        "--channels",
+        type=int,
+        default=2,
+        help="Target number of channels (1=mono, 2=stereo)",
+    )
+    parser.add_argument(
+        "-r", "--samplerate", type=int, default=44100, help="Target sample rate"
+    )
+    parser.add_argument("-R", "--min_samplerate", type=int, help="Minimum sample rate")
+    parser.add_argument(
+        "-x", "--ext", default="wav", help="File extension to search for (default: wav)"
+    )
+    parser.add_argument(
+        "-a",
+        "--auto_mono",
+        action="store_true",
+        help="Automatically convert stereo samples to mono",
+    )
+    parser.add_argument(
+        "-A",
+        "--auto_mono_threshold",
+        type=float,
+        default=-95.5,
+        help="Auto-mono threshold dB",
+    )
+    parser.add_argument(
+        "-S",
+        "--skip_spectrograms",
+        action="store_true",
+        help="Skip generating spectrogram files",
+    )
+    parser.add_argument(
+        "-d",
+        "--backup_dir",
+        default="_backup",
+        help="Directory to store backups (default: _backup)",
+    )
+    parser.add_argument(
+        "-p",
+        "--pre_normalize",
+        action="store_true",
+        help="Pre-normalize before downsampling bit-depth",
+    )
+    parser.add_argument(
+        "-l", "--list", action="store_true", help="List files without converting"
+    )
+    parser.add_argument(
+        "-n", "--dry_run", action="store_true", help="Log actions without converting"
+    )
+    parser.add_argument(
+        "-j", "--jobs", type=int, default=1, help="Number of parallel jobs (default: 1)"
+    )
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Increase verbosity"
+    )
+
     return parser.parse_args()
 
+
 def delete_resource_forks(directory):
     """Recursively find and delete all '._' resource fork files in the directory."""
     for root, dirs, files in os.walk(directory):
@@ -59,26 +111,32 @@ def delete_resource_forks(directory):
                 print(f"Deleting resource fork file: {file_path}")
                 os.remove(file_path)
 
+
 def reencode_audio(file_path):
     """Re-encode audio file to PCM 16-bit if it has a different encoding."""
     try:
         with sf.SoundFile(file_path) as f:
-            print(f"Audio encoding: {f.format}, subtype: {f.subtype}, channels: {f.channels}")
-            if f.subtype != 'PCM_16':
+            print(
+                f"Audio encoding: {f.format}, subtype: {f.subtype}, channels: {f.channels}"
+            )
+            if f.subtype != "PCM_16":
                 # If the file is not PCM 16, re-save it as PCM_16
                 data, samplerate = sf.read(file_path)
-                temp_output = file_path.replace(os.path.splitext(file_path)[1], "_reencoded.wav")
-                sf.write(temp_output, data, samplerate, subtype='PCM_16')
+                temp_output = file_path.replace(
+                    os.path.splitext(file_path)[1], "_reencoded.wav"
+                )
+                sf.write(temp_output, data, samplerate, subtype="PCM_16")
                 print(f"File re-encoded to PCM_16: {file_path} -> {temp_output}")
                 return temp_output
     except Exception as e:
         print(f"Error re-encoding {file_path}: {e}")
     return None
 
+
 def process_audio(file_path, args, dry_run=False):
     """Main function to process audio files based on arguments."""
     try:
-        print(f"Processing file: {file_path}")  # Debug logging to trace progress
+        print(f"Processing file: {file_path}")
         audio = AudioSegment.from_file(file_path)
         modified = False
         change_reason = []
@@ -108,7 +166,9 @@ def process_audio(file_path, args, dry_run=False):
 
         # Check if we need to convert the bit depth
         if audio.sample_width * 8 > args.bitdepth:
-            change_reason.append(f"bit depth {audio.sample_width * 8} -> {args.bitdepth}")
+            change_reason.append(
+                f"bit depth {audio.sample_width * 8} -> {args.bitdepth}"
+            )
             if not dry_run:
                 audio = audio.set_sample_width(args.bitdepth // 8)
             modified = True
@@ -121,7 +181,9 @@ def process_audio(file_path, args, dry_run=False):
             modified = True
         elif args.min_samplerate and audio.frame_rate < args.min_samplerate:
             # Only upsample if the user specifies a minimum sample rate
-            change_reason.append(f"sample rate {audio.frame_rate} -> {args.min_samplerate}")
+            change_reason.append(
+                f"sample rate {audio.frame_rate} -> {args.min_samplerate}"
+            )
             if not dry_run:
                 audio = audio.set_frame_rate(args.min_samplerate)
             modified = True
@@ -131,9 +193,13 @@ def process_audio(file_path, args, dry_run=False):
             if not dry_run:
                 # Backup the original file if required
                 if args.backup_dir != "-":
-                    backup_path = os.path.join(args.backup_dir, os.path.basename(file_path))
+                    # Get the relative path from the current working directory
+                    rel_path = os.path.relpath(file_path)
+                    # Create the backup path maintaining the directory structure
+                    backup_path = os.path.join(args.backup_dir, rel_path)
+                    # Ensure the directory structure exists
                     os.makedirs(os.path.dirname(backup_path), exist_ok=True)
-                    shutil.copy(file_path, backup_path)
+                    shutil.copy2(file_path, backup_path)  # copy2 preserves metadata
 
                 # Export the converted audio file
                 output_file = file_path.replace(os.path.splitext(file_path)[1], ".wav")
@@ -141,7 +207,9 @@ def process_audio(file_path, args, dry_run=False):
 
                 # Generate spectrogram if enabled
                 if not args.skip_spectrograms:
-                    generate_spectrogram(file_path, output_file, args.backup_dir)
+                    generate_spectrogram(
+                        file_path, output_file, os.path.dirname(backup_path)
+                    )
         else:
             print(f"{file_path} [UNCHANGED]")
 
@@ -155,7 +223,10 @@ def process_audio(file_path, args, dry_run=False):
                 # Retry the process with the re-encoded file
                 process_audio(reencoded_file, args, dry_run)
             except Exception as retry_error:
-                print(f"Failed to process the re-encoded file {reencoded_file}: {retry_error}")
+                print(
+                    f"Failed to process the re-encoded file {reencoded_file}: {retry_error}"
+                )
+
 
 def check_effectively_mono(audio, threshold_dB):
     """Check if a stereo file is effectively mono."""
@@ -166,6 +237,7 @@ def check_effectively_mono(audio, threshold_dB):
     peak_diff_db = difference.max_dBFS
     return peak_diff_db < threshold_dB
 
+
 def generate_spectrogram(original_file, new_file, backup_dir):
     """Generate and save spectrograms for the original and new files."""
     y_old, sr_old = librosa.load(original_file, sr=None)
@@ -174,28 +246,35 @@ def generate_spectrogram(original_file, new_file, backup_dir):
     # Spectrogram for original file
     plt.figure(figsize=(10, 4))
     D_old = librosa.amplitude_to_db(np.abs(librosa.stft(y_old)), ref=np.max)
-    librosa.display.specshow(D_old, sr=sr_old, x_axis='time', y_axis='log')
-    plt.colorbar(format='%+2.0f dB')
-    plt.title(f'Spectrogram of {original_file}')
-    old_spectrogram_path = os.path.join(backup_dir, os.path.basename(original_file) + ".old.png")
+    librosa.display.specshow(D_old, sr=sr_old, x_axis="time", y_axis="log")
+    plt.colorbar(format="%+2.0f dB")
+    plt.title(f"Spectrogram of {os.path.basename(original_file)}")
+    old_spectrogram_path = os.path.join(
+        backup_dir, os.path.basename(original_file) + ".old.png"
+    )
+    os.makedirs(backup_dir, exist_ok=True)  # Ensure the directory exists
     plt.savefig(old_spectrogram_path)
     plt.close()
 
     # Spectrogram for new file
     plt.figure(figsize=(10, 4))
     D_new = librosa.amplitude_to_db(np.abs(librosa.stft(y_new)), ref=np.max)
-    librosa.display.specshow(D_new, sr=sr_new, x_axis='time', y_axis='log')
-    plt.colorbar(format='%+2.0f dB')
-    plt.title(f'Spectrogram of {new_file}')
-    new_spectrogram_path = os.path.join(backup_dir, os.path.basename(new_file) + ".new.png")
+    librosa.display.specshow(D_new, sr=sr_new, x_axis="time", y_axis="log")
+    plt.colorbar(format="%+2.0f dB")
+    plt.title(f"Spectrogram of {os.path.basename(new_file)}")
+    new_spectrogram_path = os.path.join(
+        backup_dir, os.path.basename(new_file) + ".new.png"
+    )
     plt.savefig(new_spectrogram_path)
     plt.close()
 
+
 def list_files(args, file_list):
     """Prints file summary and actions without performing them."""
     for file_path in file_list:
         print(f"Previewing: {file_path}")
 
+
 def collect_files(args):
     """Collect all files from provided directories and files, skipping resource fork files."""
     file_list = []
@@ -210,14 +289,19 @@ def collect_files(args):
                 file_list.append(path)
     return file_list
 
+
 def run_in_parallel(file_list, args):
     """Run the audio processing in parallel."""
     try:
         with ThreadPoolExecutor(max_workers=args.jobs) as executor:
-            futures = {executor.submit(process_audio, file, args): file for file in file_list}
+            futures = {
+                executor.submit(process_audio, file, args): file for file in file_list
+            }
             for future in concurrent.futures.as_completed(futures):
                 try:
-                    result = future.result()  # Get the result of the future (processed file)
+                    result = (
+                        future.result()
+                    )  # Get the result of the future (processed file)
                 except Exception as exc:
                     file = futures[future]
                     print(f"File {file} generated an exception: {exc}")
@@ -226,6 +310,7 @@ def run_in_parallel(file_list, args):
         executor.shutdown(wait=False, cancel_futures=True)
         raise
 
+
 def main():
     args = parse_args()
 
@@ -249,5 +334,6 @@ def main():
     else:
         run_in_parallel(file_list, args)
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()

From afdd47b23103d40294b806b3b5e58867bd09ad8e Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 00:12:55 +0000
Subject: [PATCH 03/66] Nov 17, 2024, 4:12 PM

---
 sample-shrinker-python/sample-shrinker.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 3e66d66..0bfc0d2 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -53,7 +53,10 @@ def parse_args():
     )
     parser.add_argument("-R", "--min_samplerate", type=int, help="Minimum sample rate")
     parser.add_argument(
-        "-x", "--ext", default="wav", help="File extension to search for (default: wav)"
+        "-x",
+        "--ext",
+        default="wav,mp3",
+        help="Comma-separated file extensions to search for (default: wav,mp3)",
     )
     parser.add_argument(
         "-a",
@@ -276,16 +279,26 @@ def list_files(args, file_list):
 
 
 def collect_files(args):
-    """Collect all files from provided directories and files, skipping resource fork files."""
+    """Collect all wav and mp3 files from provided directories and files."""
     file_list = []
+    # Split extensions string into a list and clean up whitespace
+    valid_extensions = [ext.strip().lower() for ext in args.ext.split(",")]
+
     for path in args.files:
         if os.path.isdir(path):
             for root, dirs, files in os.walk(path):
                 for file in files:
-                    if file.endswith(f".{args.ext}") and not file.startswith("._"):
+                    file_lower = file.lower()
+                    # Check if file ends with any of the valid extensions
+                    if any(
+                        file_lower.endswith(f".{ext}") for ext in valid_extensions
+                    ) and not file.startswith("._"):
                         file_list.append(os.path.join(root, file))
         elif os.path.isfile(path):
-            if not os.path.basename(path).startswith("._"):
+            file_lower = path.lower()
+            if any(
+                file_lower.endswith(f".{ext}") for ext in valid_extensions
+            ) and not os.path.basename(path).startswith("._"):
                 file_list.append(path)
     return file_list
 

From 91a9605256d59fac411268454d941e0646a3f5c2 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 00:17:48 +0000
Subject: [PATCH 04/66] Nov 17, 2024, 4:17 PM

---
 sample-shrinker-python/sample-shrinker.py | 115 +++++++++++++++++++---
 1 file changed, 101 insertions(+), 14 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 0bfc0d2..ba56239 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -2,11 +2,15 @@
 import concurrent.futures
 import os
 import shutil
+import time
+from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
 
 import librosa
 import matplotlib.pyplot as plt
 import numpy as np
+import questionary
 import soundfile as sf
 from pydub import AudioSegment
 
@@ -324,6 +328,61 @@ def run_in_parallel(file_list, args):
         raise
 
 
+def find_duplicate_directories(paths):
+    """Find directories with matching names and file counts."""
+    dir_map = defaultdict(list)
+
+    for path in paths:
+        path = Path(path)
+        if path.is_dir():
+            for dir_path in path.rglob("*"):
+                if dir_path.is_dir():
+                    # Get directory name, file count, and total size
+                    dir_name = dir_path.name.lower()  # Case-insensitive comparison
+                    files = list(dir_path.glob("*"))
+                    file_count = len([f for f in files if f.is_file()])
+                    total_size = sum(f.stat().st_size for f in files if f.is_file())
+
+                    dir_map[(dir_name, file_count, total_size)].append(dir_path)
+
+    # Return only directories that have duplicates
+    return {k: v for k, v in dir_map.items() if len(v) > 1}
+
+
+def process_duplicate_directories(duplicates, args):
+    """Process duplicate directories, keeping the oldest copy."""
+    for (dir_name, file_count, total_size), paths in duplicates.items():
+        print(
+            f"\nFound duplicate directories named '{dir_name}' with {file_count} files ({total_size} bytes):"
+        )
+
+        # Sort paths by creation time
+        paths_with_time = [(p, p.stat().st_ctime) for p in paths]
+        paths_with_time.sort(key=lambda x: x[1])
+
+        # Keep the oldest directory
+        original_dir = paths_with_time[0][0]
+        print(
+            f"Keeping oldest copy: {original_dir} (created: {time.ctime(paths_with_time[0][1])})"
+        )
+
+        # Process newer copies
+        for dir_path, ctime in paths_with_time[1:]:
+            print(f"Moving duplicate: {dir_path} (created: {time.ctime(ctime)})")
+            if not args.dry_run:
+                # Create backup path
+                rel_path = dir_path.relative_to(dir_path.parent.parent)
+                backup_path = Path(args.backup_dir) / rel_path
+
+                # Ensure backup directory exists
+                backup_path.parent.mkdir(parents=True, exist_ok=True)
+
+                try:
+                    shutil.move(str(dir_path), str(backup_path))
+                except Exception as e:
+                    print(f"Error moving directory {dir_path}: {e}")
+
+
 def main():
     args = parse_args()
 
@@ -332,20 +391,48 @@ def main():
         print(usage_intro())
         return
 
-    # Delete all '._' files before processing anything
-    for path in args.files:
-        if os.path.isdir(path):
-            delete_resource_forks(path)
-
-    # Collect the files to process
-    file_list = collect_files(args)
-
-    if args.dry_run or args.list:
-        list_files(args, file_list)
-        for file in file_list:
-            process_audio(file, args, dry_run=True)
-    else:
-        run_in_parallel(file_list, args)
+    # Ask user what they want to do
+    action = questionary.select(
+        "What would you like to do?",
+        choices=[
+            "Shrink samples (convert audio files)",
+            "Remove duplicate directories",
+            "Exit",
+        ],
+    ).ask()
+
+    if action == "Exit":
+        return
+    elif action == "Remove duplicate directories":
+        # Find and process duplicate directories
+        print("\nSearching for duplicate directories...")
+        duplicates = find_duplicate_directories(args.files)
+
+        if not duplicates:
+            print("No duplicate directories found.")
+            return
+
+        if args.dry_run:
+            print("\nDRY RUN - No files will be moved")
+
+        process_duplicate_directories(duplicates, args)
+        print("\nDuplicate removal complete!")
+
+    else:  # Shrink samples
+        # Delete all '._' files before processing anything
+        for path in args.files:
+            if os.path.isdir(path):
+                delete_resource_forks(path)
+
+        # Collect the files to process
+        file_list = collect_files(args)
+
+        if args.dry_run or args.list:
+            list_files(args, file_list)
+            for file in file_list:
+                process_audio(file, args, dry_run=True)
+        else:
+            run_in_parallel(file_list, args)
 
 
 if __name__ == "__main__":

From 1b57e195ee567ae300caf59e030c1cc9c6d8651c Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 00:22:08 +0000
Subject: [PATCH 05/66] Nov 17, 2024, 4:22 PM

---
 sample-shrinker-python/sample-shrinker.py | 122 ++++++++++++++++++++--
 1 file changed, 111 insertions(+), 11 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index ba56239..c41e4ca 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -383,15 +383,10 @@ def process_duplicate_directories(duplicates, args):
                     print(f"Error moving directory {dir_path}: {e}")
 
 
-def main():
-    args = parse_args()
-
-    # Ensure that at least one file or directory is provided
-    if not args.files:
-        print(usage_intro())
-        return
-
-    # Ask user what they want to do
+def get_interactive_config():
+    """Get configuration through interactive questionary prompts."""
+    
+    # First, get the action type
     action = questionary.select(
         "What would you like to do?",
         choices=[
@@ -402,8 +397,113 @@ def main():
     ).ask()
 
     if action == "Exit":
+        return None, None
+
+    # Get the directory/files to process
+    paths = questionary.path(
+        "Select directory or file to process:",
+        only_directories=False,
+        multiple=True
+    ).ask()
+
+    if not paths:
+        return None, None
+
+    # Create a namespace object to match argparse structure
+    args = argparse.Namespace()
+    args.files = paths.split(",") if isinstance(paths, str) else paths
+    
+    # Set defaults
+    args.backup_dir = "_backup"
+    args.dry_run = False
+    args.skip_spectrograms = False
+    args.jobs = 1
+    args.verbose = False
+    args.ext = "wav,mp3"
+
+    if action == "Remove duplicate directories":
+        # For duplicate removal, we only need a few additional options
+        args.dry_run = questionary.confirm(
+            "Would you like to do a dry run first (preview without making changes)?",
+            default=True
+        ).ask()
+        
+        return "duplicates", args
+
+    # For sample shrinking, get all the conversion options
+    args.bitdepth = questionary.select(
+        "Select target bit depth:",
+        choices=["8", "16", "24"],
+        default="16"
+    ).ask()
+    args.bitdepth = int(args.bitdepth)
+
+    args.channels = questionary.select(
+        "Select target channels:",
+        choices=[
+            "1 (mono)",
+            "2 (stereo)"
+        ],
+        default="2 (stereo)"
+    ).ask()
+    args.channels = 1 if "1" in args.channels else 2
+
+    args.samplerate = questionary.select(
+        "Select target sample rate:",
+        choices=["22050", "44100", "48000"],
+        default="44100"
+    ).ask()
+    args.samplerate = int(args.samplerate)
+
+    # Advanced options in a checkbox group
+    advanced_options = questionary.checkbox(
+        "Select additional options:",
+        choices=[
+            "Auto-convert stereo to mono when possible",
+            "Pre-normalize before conversion",
+            "Skip generating spectrograms",
+            "Preview changes (dry run)",
+            "Process files in parallel"
+        ]
+    ).ask()
+
+    args.auto_mono = "Auto-convert stereo to mono when possible" in advanced_options
+    args.pre_normalize = "Pre-normalize before conversion" in advanced_options
+    args.skip_spectrograms = "Skip generating spectrograms" in advanced_options
+    args.dry_run = "Preview changes (dry run)" in advanced_options
+    
+    if "Process files in parallel" in advanced_options:
+        args.jobs = questionary.select(
+            "How many parallel jobs?",
+            choices=["2", "4", "8", "16"],
+            default="4"
+        ).ask()
+        args.jobs = int(args.jobs)
+
+    if args.auto_mono:
+        args.auto_mono_threshold = float(
+            questionary.text(
+                "Auto-mono threshold in dB (default: -95.5):",
+                default="-95.5"
+            ).ask()
+        )
+
+    return "shrink", args
+
+
+def main():
+    # Check if command line arguments were provided
+    if len(sys.argv) > 1:
+        args = parse_args()
+        action = "shrink"  # Default to shrink mode for command line
+    else:
+        # Use interactive mode
+        action, args = get_interactive_config()
+        
+    if not args:
         return
-    elif action == "Remove duplicate directories":
+
+    if action == "duplicates":
         # Find and process duplicate directories
         print("\nSearching for duplicate directories...")
         duplicates = find_duplicate_directories(args.files)
@@ -427,7 +527,7 @@ def main():
         # Collect the files to process
         file_list = collect_files(args)
 
-        if args.dry_run or args.list:
+        if args.dry_run:
             list_files(args, file_list)
             for file in file_list:
                 process_audio(file, args, dry_run=True)

From 78c45d6dc107a1180f1d2ea2c55566c215b21b2e Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 00:24:50 +0000
Subject: [PATCH 06/66] Nov 17, 2024, 4:24 PM

---
 sample-shrinker-python/sample-shrinker.py | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index c41e4ca..a98357c 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -504,20 +504,7 @@ def main():
         return
 
     if action == "duplicates":
-        # Find and process duplicate directories
-        print("\nSearching for duplicate directories...")
-        duplicates = find_duplicate_directories(args.files)
-
-        if not duplicates:
-            print("No duplicate directories found.")
-            return
-
-        if args.dry_run:
-            print("\nDRY RUN - No files will be moved")
-
-        process_duplicate_directories(duplicates, args)
-        print("\nDuplicate removal complete!")
-
+        process_duplicates(args)
     else:  # Shrink samples
         # Delete all '._' files before processing anything
         for path in args.files:

From bb3cea8a46e5f894d82c909c347bd17f37638bcf Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 00:26:46 +0000
Subject: [PATCH 07/66] Nov 17, 2024, 4:26 PM

---
 sample-shrinker-python/sample-shrinker.py | 69 +++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index a98357c..dd915dd 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -6,6 +6,8 @@
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
+import hashlib
+import filecmp
 
 import librosa
 import matplotlib.pyplot as plt
@@ -491,6 +493,73 @@ def get_interactive_config():
     return "shrink", args
 
 
+def process_duplicates(args):
+    """Process both directory and file level duplicates with safety checks."""
+    print("\nPhase 1: Searching for duplicate directories...")
+    dir_duplicates = find_duplicate_directories(args.files)
+    
+    if dir_duplicates:
+        print(f"\nFound {sum(len(v) - 1 for v in dir_duplicates.values())} duplicate directories")
+        
+        # Safety check: Verify directory contents match exactly
+        verified_duplicates = {}
+        for key, paths in dir_duplicates.items():
+            dir_name, file_count, total_size = key
+            
+            # Get file listing for each directory
+            dir_contents = defaultdict(list)
+            for path in paths:
+                files = sorted(f.relative_to(path) for f in path.rglob("*") if f.is_file())
+                content_hash = hashlib.sha256(str(files).encode()).hexdigest()
+                dir_contents[content_hash].append(path)
+            
+            # Only keep directories with exactly matching contents
+            for content_hash, matching_paths in dir_contents.items():
+                if len(matching_paths) > 1:
+                    verified_duplicates[key + (content_hash,)] = matching_paths
+        
+        if args.dry_run:
+            print("\nDRY RUN - No directories will be moved")
+        process_duplicate_directories(verified_duplicates, args)
+    else:
+        print("No duplicate directories found.")
+    
+    print("\nPhase 2: Searching for duplicate files...")
+    file_duplicates = find_duplicate_files(args.files)
+    
+    if file_duplicates:
+        total_duplicates = sum(len(group) - 1 for group in file_duplicates)
+        print(f"\nFound {total_duplicates} duplicate files")
+        
+        # Additional safety checks for file processing
+        safe_duplicates = []
+        for group in file_duplicates:
+            # Verify files are not symbolic links
+            real_files = [f for f in group if not f.is_symlink()]
+            
+            # Check if files are in use (on Windows) or locked
+            available_files = []
+            for file in real_files:
+                try:
+                    with open(file, 'rb') as f:
+                        # Try to get a shared lock
+                        pass
+                    available_files.append(file)
+                except (IOError, OSError):
+                    print(f"Warning: File {file} appears to be in use, skipping")
+            
+            if len(available_files) > 1:
+                safe_duplicates.append(available_files)
+        
+        if args.dry_run:
+            print("\nDRY RUN - No files will be moved")
+        process_duplicate_files(safe_duplicates, args)
+    else:
+        print("No duplicate files found.")
+    
+    print("\nDuplicate removal complete!")
+
+
 def main():
     # Check if command line arguments were provided
     if len(sys.argv) > 1:

From c9f2111179fd36efd3ab63f73a4113e612eef64f Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 00:28:19 +0000
Subject: [PATCH 08/66] Nov 17, 2024, 4:28 PM

---
 sample-shrinker-python/sample-shrinker.py | 139 +++++++++++++++++++++-
 1 file changed, 137 insertions(+), 2 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index dd915dd..82ca17b 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -8,6 +8,7 @@
 from pathlib import Path
 import hashlib
 import filecmp
+import ssdeep  # Add to imports
 
 import librosa
 import matplotlib.pyplot as plt
@@ -330,6 +331,140 @@ def run_in_parallel(file_list, args):
         raise
 
 
+def get_file_hash(file_path, fuzzy=False, chunk_size=1024*1024):
+    """Calculate file hash using either SHA-256 or fuzzy hashing."""
+    if fuzzy:
+        try:
+            # Generate fuzzy hash for the file
+            return ssdeep.hash_from_file(str(file_path))
+        except Exception as e:
+            print(f"Error generating fuzzy hash for {file_path}: {e}")
+            return None
+    else:
+        # Standard SHA-256 hash with quick check
+        sha256_hash = hashlib.sha256()
+        file_size = os.path.getsize(file_path)
+        
+        with open(file_path, "rb") as f:
+            # Read first chunk
+            first_chunk = f.read(chunk_size)
+            sha256_hash.update(first_chunk)
+            
+            # If file is large enough, read last chunk
+            if file_size > chunk_size * 2:
+                f.seek(-chunk_size, 2)
+                last_chunk = f.read(chunk_size)
+                sha256_hash.update(last_chunk)
+                
+            return sha256_hash.hexdigest()
+
+def is_audio_file(file_path):
+    """Check if file is an audio file we want to process."""
+    return file_path.lower().endswith(('.wav', '.mp3'))
+
+def find_duplicate_files(paths, fuzzy_threshold=90):
+    """Find duplicate files using a multi-stage approach with optional fuzzy matching."""
+    # Stage 1: Group by size (fast)
+    size_groups = defaultdict(list)
+    
+    for path in paths:
+        path = Path(path)
+        if path.is_dir():
+            for file_path in path.rglob("*"):
+                if file_path.is_file() and is_audio_file(str(file_path)):
+                    size = file_path.stat().st_size
+                    size_groups[size].append(file_path)
+    
+    # Stage 2: For same-size files, group by quick hash
+    hash_groups = defaultdict(list)
+    fuzzy_groups = []  # Store groups of similar files
+    
+    for size, file_paths in size_groups.items():
+        if len(file_paths) > 1:  # Only process groups with potential duplicates
+            # First, try exact matches
+            for file_path in file_paths:
+                try:
+                    file_hash = get_file_hash(file_path, fuzzy=False)
+                    hash_groups[file_hash].append(file_path)
+                except Exception as e:
+                    print(f"Error hashing file {file_path}: {e}")
+            
+            # Then, try fuzzy matching for files that weren't exact matches
+            unmatched_files = [f for f in file_paths if not any(f in group for group in hash_groups.values() if len(group) > 1)]
+            if len(unmatched_files) > 1:
+                fuzzy_matches = defaultdict(list)
+                for file_path in unmatched_files:
+                    fuzzy_hash = get_file_hash(file_path, fuzzy=True)
+                    if fuzzy_hash:
+                        fuzzy_matches[file_path] = fuzzy_hash
+                
+                # Compare fuzzy hashes
+                matched = set()
+                for file1, hash1 in fuzzy_matches.items():
+                    if file1 in matched:
+                        continue
+                    similar_files = [file1]
+                    for file2, hash2 in fuzzy_matches.items():
+                        if file2 != file1 and file2 not in matched:
+                            similarity = ssdeep.compare(hash1, hash2)
+                            if similarity >= fuzzy_threshold:
+                                similar_files.append(file2)
+                                matched.add(file2)
+                    if len(similar_files) > 1:
+                        fuzzy_groups.append(similar_files)
+                        matched.add(file1)
+    
+    # Combine exact and fuzzy matches
+    duplicates = [group for group in hash_groups.values() if len(group) > 1]
+    duplicates.extend(fuzzy_groups)
+    
+    return duplicates, fuzzy_groups
+
+def process_duplicate_files(duplicates, fuzzy_groups, args):
+    """Process duplicate files with enhanced reporting."""
+    for group in duplicates:
+        is_fuzzy = group in fuzzy_groups
+        match_type = "similar" if is_fuzzy else "identical"
+        
+        # Get file size for reporting
+        file_size = group[0].stat().st_size
+        print(f"\nFound {match_type} files: '{group[0].name}' ({file_size} bytes)")
+        
+        if is_fuzzy:
+            # For fuzzy matches, show similarity percentages
+            base_hash = get_file_hash(group[0], fuzzy=True)
+            print("Similarity scores:")
+            for file in group[1:]:
+                file_hash = get_file_hash(file, fuzzy=True)
+                similarity = ssdeep.compare(base_hash, file_hash)
+                print(f"  {file.name}: {similarity}% similar")
+        
+        # Sort files by creation time
+        files_with_time = [(f, f.stat().st_ctime) for f in group]
+        files_with_time.sort(key=lambda x: x[1])
+        
+        # Keep the oldest file
+        original_file = files_with_time[0][0]
+        print(f"Keeping oldest copy: {original_file} (created: {time.ctime(files_with_time[0][1])})")
+        
+        # Process newer copies
+        for file_path, ctime in files_with_time[1:]:
+            print(f"Moving {match_type} file: {file_path} (created: {time.ctime(ctime)})")
+            if not args.dry_run:
+                try:
+                    # Create backup path maintaining directory structure
+                    rel_path = file_path.relative_to(file_path.parent.parent)
+                    backup_path = Path(args.backup_dir) / rel_path
+                    
+                    # Ensure backup directory exists
+                    backup_path.parent.mkdir(parents=True, exist_ok=True)
+                    
+                    # Move the file
+                    shutil.move(str(file_path), str(backup_path))
+                except Exception as e:
+                    print(f"Error moving file {file_path}: {e}")
+
+
 def find_duplicate_directories(paths):
     """Find directories with matching names and file counts."""
     dir_map = defaultdict(list)
@@ -525,7 +660,7 @@ def process_duplicates(args):
         print("No duplicate directories found.")
     
     print("\nPhase 2: Searching for duplicate files...")
-    file_duplicates = find_duplicate_files(args.files)
+    file_duplicates, fuzzy_groups = find_duplicate_files(args.files)
     
     if file_duplicates:
         total_duplicates = sum(len(group) - 1 for group in file_duplicates)
@@ -553,7 +688,7 @@ def process_duplicates(args):
         
         if args.dry_run:
             print("\nDRY RUN - No files will be moved")
-        process_duplicate_files(safe_duplicates, args)
+        process_duplicate_files(safe_duplicates, fuzzy_groups, args)
     else:
         print("No duplicate files found.")
     

From 3936bc8ecec62c1e75f968ec38f98d1863d39fb9 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 00:29:44 +0000
Subject: [PATCH 09/66] Nov 17, 2024, 4:29 PM

---
 sample-shrinker-python/sample-shrinker.py | 155 ++++++++++++++++------
 1 file changed, 114 insertions(+), 41 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 82ca17b..4386eff 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -362,9 +362,9 @@ def is_audio_file(file_path):
     """Check if file is an audio file we want to process."""
     return file_path.lower().endswith(('.wav', '.mp3'))
 
-def find_duplicate_files(paths, fuzzy_threshold=90):
+def find_duplicate_files(paths, args):
     """Find duplicate files using a multi-stage approach with optional fuzzy matching."""
-    # Stage 1: Group by size (fast)
+    print("Scanning for duplicate files...")
     size_groups = defaultdict(list)
     
     for path in paths:
@@ -372,51 +372,76 @@ def find_duplicate_files(paths, fuzzy_threshold=90):
         if path.is_dir():
             for file_path in path.rglob("*"):
                 if file_path.is_file() and is_audio_file(str(file_path)):
+                    if args.verbose:
+                        print(f"Scanning: {file_path}")
                     size = file_path.stat().st_size
                     size_groups[size].append(file_path)
     
-    # Stage 2: For same-size files, group by quick hash
     hash_groups = defaultdict(list)
-    fuzzy_groups = []  # Store groups of similar files
+    fuzzy_groups = []
     
     for size, file_paths in size_groups.items():
-        if len(file_paths) > 1:  # Only process groups with potential duplicates
-            # First, try exact matches
+        if len(file_paths) > 1:
+            if args.verbose:
+                print(f"\nChecking {len(file_paths)} files of size {size} bytes...")
+            
+            # First pass: exact matches
             for file_path in file_paths:
                 try:
                     file_hash = get_file_hash(file_path, fuzzy=False)
-                    hash_groups[file_hash].append(file_path)
+                    if args.ignore_names:
+                        # Use only the hash for grouping if ignoring names
+                        hash_groups[file_hash].append(file_path)
+                    else:
+                        # Include name in grouping key
+                        name_key = file_path.stem.lower()
+                        hash_groups[(name_key, file_hash)].append(file_path)
                 except Exception as e:
                     print(f"Error hashing file {file_path}: {e}")
             
-            # Then, try fuzzy matching for files that weren't exact matches
-            unmatched_files = [f for f in file_paths if not any(f in group for group in hash_groups.values() if len(group) > 1)]
-            if len(unmatched_files) > 1:
-                fuzzy_matches = defaultdict(list)
-                for file_path in unmatched_files:
-                    fuzzy_hash = get_file_hash(file_path, fuzzy=True)
-                    if fuzzy_hash:
-                        fuzzy_matches[file_path] = fuzzy_hash
-                
-                # Compare fuzzy hashes
-                matched = set()
-                for file1, hash1 in fuzzy_matches.items():
-                    if file1 in matched:
-                        continue
-                    similar_files = [file1]
-                    for file2, hash2 in fuzzy_matches.items():
-                        if file2 != file1 and file2 not in matched:
-                            similarity = ssdeep.compare(hash1, hash2)
-                            if similarity >= fuzzy_threshold:
-                                similar_files.append(file2)
-                                matched.add(file2)
-                    if len(similar_files) > 1:
-                        fuzzy_groups.append(similar_files)
-                        matched.add(file1)
+            # Second pass: fuzzy matching if enabled
+            if args.use_fuzzy:
+                unmatched = [f for f in file_paths if not any(f in g for g in hash_groups.values() if len(g) > 1)]
+                if len(unmatched) > 1:
+                    fuzzy_matches = defaultdict(list)
+                    
+                    for file_path in unmatched:
+                        try:
+                            audio = AudioSegment.from_file(str(file_path))
+                            fuzzy_key = []
+                            
+                            if "Compare file lengths" in args.fuzzy_options:
+                                fuzzy_key.append(len(audio))
+                            if "Compare sample rates" in args.fuzzy_options:
+                                fuzzy_key.append(audio.frame_rate)
+                            if "Compare channel counts" in args.fuzzy_options:
+                                fuzzy_key.append(audio.channels)
+                            
+                            fuzzy_hash = get_file_hash(file_path, fuzzy=True)
+                            if fuzzy_hash:
+                                fuzzy_matches[(tuple(fuzzy_key), fuzzy_hash)].append(file_path)
+                        except Exception as e:
+                            print(f"Error analyzing {file_path}: {e}")
+                    
+                    # Compare fuzzy matches
+                    for key, matches in fuzzy_matches.items():
+                        if len(matches) > 1:
+                            base_hash = get_file_hash(matches[0], fuzzy=True)
+                            similar_files = [matches[0]]
+                            
+                            for other_file in matches[1:]:
+                                other_hash = get_file_hash(other_file, fuzzy=True)
+                                similarity = ssdeep.compare(base_hash, other_hash)
+                                if similarity >= args.fuzzy_threshold:
+                                    similar_files.append(other_file)
+                            
+                            if len(similar_files) > 1:
+                                fuzzy_groups.append(similar_files)
     
-    # Combine exact and fuzzy matches
+    # Combine results based on exact and fuzzy matches
     duplicates = [group for group in hash_groups.values() if len(group) > 1]
-    duplicates.extend(fuzzy_groups)
+    if args.use_fuzzy:
+        duplicates.extend(fuzzy_groups)
     
     return duplicates, fuzzy_groups
 
@@ -553,18 +578,66 @@ def get_interactive_config():
     # Set defaults
     args.backup_dir = "_backup"
     args.dry_run = False
-    args.skip_spectrograms = False
-    args.jobs = 1
     args.verbose = False
     args.ext = "wav,mp3"
 
     if action == "Remove duplicate directories":
-        # For duplicate removal, we only need a few additional options
-        args.dry_run = questionary.confirm(
-            "Would you like to do a dry run first (preview without making changes)?",
-            default=True
+        # For duplicate removal, get configuration options
+        duplicate_options = questionary.checkbox(
+            "Select duplicate removal options:",
+            choices=[
+                "Use fuzzy matching for similar files",
+                "Ignore filenames (match by content only)",
+                "Preview changes (dry run)",
+                "Show detailed progress",
+            ],
+            default=["Preview changes (dry run)"]
         ).ask()
-        
+
+        args.use_fuzzy = "Use fuzzy matching for similar files" in duplicate_options
+        args.ignore_names = "Ignore filenames (match by content only)" in duplicate_options
+        args.dry_run = "Preview changes (dry run)" in duplicate_options
+        args.verbose = "Show detailed progress" in duplicate_options
+
+        if args.use_fuzzy:
+            # Get fuzzy matching configuration
+            args.fuzzy_threshold = questionary.select(
+                "Select fuzzy matching threshold (higher = more strict):",
+                choices=[
+                    "95 - Nearly identical",
+                    "90 - Very similar",
+                    "85 - Similar",
+                    "80 - Somewhat similar"
+                ],
+                default="90 - Very similar"
+            ).ask()
+            args.fuzzy_threshold = int(args.fuzzy_threshold.split()[0])
+
+            args.fuzzy_options = questionary.checkbox(
+                "Select fuzzy matching options:",
+                choices=[
+                    "Compare file lengths",
+                    "Compare sample rates",
+                    "Compare channel counts",
+                ],
+                default=["Compare file lengths", "Compare sample rates"]
+            ).ask()
+
+        # Get backup options
+        backup_choice = questionary.select(
+            "How should duplicates be handled?",
+            choices=[
+                "Move to backup directory (safe)",
+                "Delete immediately (dangerous)",
+                "Preview only (no changes)"
+            ],
+            default="Move to backup directory (safe)"
+        ).ask()
+
+        args.backup_dir = "_backup" if "Move" in backup_choice else None
+        args.delete_duplicates = "Delete" in backup_choice
+        args.dry_run = "Preview" in backup_choice
+
         return "duplicates", args
 
     # For sample shrinking, get all the conversion options
@@ -660,7 +733,7 @@ def process_duplicates(args):
         print("No duplicate directories found.")
     
     print("\nPhase 2: Searching for duplicate files...")
-    file_duplicates, fuzzy_groups = find_duplicate_files(args.files)
+    file_duplicates, fuzzy_groups = find_duplicate_files(args.files, args)
     
     if file_duplicates:
         total_duplicates = sum(len(group) - 1 for group in file_duplicates)

From 6a0740f90c5b4e57a6e8fd51cbff3443faa4a115 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 00:30:43 +0000
Subject: [PATCH 10/66] Nov 17, 2024, 4:30 PM

---
 sample-shrinker-python/sample-shrinker.py | 163 ++++++++++++----------
 1 file changed, 86 insertions(+), 77 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 4386eff..ff0e2fe 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -1,20 +1,20 @@
 import argparse
 import concurrent.futures
+import filecmp
+import hashlib
 import os
 import shutil
 import time
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
-import hashlib
-import filecmp
-import ssdeep  # Add to imports
 
 import librosa
 import matplotlib.pyplot as plt
 import numpy as np
 import questionary
 import soundfile as sf
+import ssdeep  # Add to imports
 from pydub import AudioSegment
 
 
@@ -331,7 +331,7 @@ def run_in_parallel(file_list, args):
         raise
 
 
-def get_file_hash(file_path, fuzzy=False, chunk_size=1024*1024):
+def get_file_hash(file_path, fuzzy=False, chunk_size=1024 * 1024):
     """Calculate file hash using either SHA-256 or fuzzy hashing."""
     if fuzzy:
         try:
@@ -344,29 +344,31 @@ def get_file_hash(file_path, fuzzy=False, chunk_size=1024*1024):
         # Standard SHA-256 hash with quick check
         sha256_hash = hashlib.sha256()
         file_size = os.path.getsize(file_path)
-        
+
         with open(file_path, "rb") as f:
             # Read first chunk
             first_chunk = f.read(chunk_size)
             sha256_hash.update(first_chunk)
-            
+
             # If file is large enough, read last chunk
             if file_size > chunk_size * 2:
                 f.seek(-chunk_size, 2)
                 last_chunk = f.read(chunk_size)
                 sha256_hash.update(last_chunk)
-                
+
             return sha256_hash.hexdigest()
 
+
 def is_audio_file(file_path):
     """Check if file is an audio file we want to process."""
-    return file_path.lower().endswith(('.wav', '.mp3'))
+    return file_path.lower().endswith((".wav", ".mp3"))
+
 
 def find_duplicate_files(paths, args):
     """Find duplicate files using a multi-stage approach with optional fuzzy matching."""
     print("Scanning for duplicate files...")
     size_groups = defaultdict(list)
-    
+
     for path in paths:
         path = Path(path)
         if path.is_dir():
@@ -376,15 +378,15 @@ def find_duplicate_files(paths, args):
                         print(f"Scanning: {file_path}")
                     size = file_path.stat().st_size
                     size_groups[size].append(file_path)
-    
+
     hash_groups = defaultdict(list)
     fuzzy_groups = []
-    
+
     for size, file_paths in size_groups.items():
         if len(file_paths) > 1:
             if args.verbose:
                 print(f"\nChecking {len(file_paths)} files of size {size} bytes...")
-            
+
             # First pass: exact matches
             for file_path in file_paths:
                 try:
@@ -398,63 +400,70 @@ def find_duplicate_files(paths, args):
                         hash_groups[(name_key, file_hash)].append(file_path)
                 except Exception as e:
                     print(f"Error hashing file {file_path}: {e}")
-            
+
             # Second pass: fuzzy matching if enabled
             if args.use_fuzzy:
-                unmatched = [f for f in file_paths if not any(f in g for g in hash_groups.values() if len(g) > 1)]
+                unmatched = [
+                    f
+                    for f in file_paths
+                    if not any(f in g for g in hash_groups.values() if len(g) > 1)
+                ]
                 if len(unmatched) > 1:
                     fuzzy_matches = defaultdict(list)
-                    
+
                     for file_path in unmatched:
                         try:
                             audio = AudioSegment.from_file(str(file_path))
                             fuzzy_key = []
-                            
+
                             if "Compare file lengths" in args.fuzzy_options:
                                 fuzzy_key.append(len(audio))
                             if "Compare sample rates" in args.fuzzy_options:
                                 fuzzy_key.append(audio.frame_rate)
                             if "Compare channel counts" in args.fuzzy_options:
                                 fuzzy_key.append(audio.channels)
-                            
+
                             fuzzy_hash = get_file_hash(file_path, fuzzy=True)
                             if fuzzy_hash:
-                                fuzzy_matches[(tuple(fuzzy_key), fuzzy_hash)].append(file_path)
+                                fuzzy_matches[(tuple(fuzzy_key), fuzzy_hash)].append(
+                                    file_path
+                                )
                         except Exception as e:
                             print(f"Error analyzing {file_path}: {e}")
-                    
+
                     # Compare fuzzy matches
                     for key, matches in fuzzy_matches.items():
                         if len(matches) > 1:
                             base_hash = get_file_hash(matches[0], fuzzy=True)
                             similar_files = [matches[0]]
-                            
+
                             for other_file in matches[1:]:
                                 other_hash = get_file_hash(other_file, fuzzy=True)
                                 similarity = ssdeep.compare(base_hash, other_hash)
                                 if similarity >= args.fuzzy_threshold:
                                     similar_files.append(other_file)
-                            
+
                             if len(similar_files) > 1:
                                 fuzzy_groups.append(similar_files)
-    
+
     # Combine results based on exact and fuzzy matches
     duplicates = [group for group in hash_groups.values() if len(group) > 1]
     if args.use_fuzzy:
         duplicates.extend(fuzzy_groups)
-    
+
     return duplicates, fuzzy_groups
 
+
 def process_duplicate_files(duplicates, fuzzy_groups, args):
     """Process duplicate files with enhanced reporting."""
     for group in duplicates:
         is_fuzzy = group in fuzzy_groups
         match_type = "similar" if is_fuzzy else "identical"
-        
+
         # Get file size for reporting
         file_size = group[0].stat().st_size
         print(f"\nFound {match_type} files: '{group[0].name}' ({file_size} bytes)")
-        
+
         if is_fuzzy:
             # For fuzzy matches, show similarity percentages
             base_hash = get_file_hash(group[0], fuzzy=True)
@@ -463,27 +472,31 @@ def process_duplicate_files(duplicates, fuzzy_groups, args):
                 file_hash = get_file_hash(file, fuzzy=True)
                 similarity = ssdeep.compare(base_hash, file_hash)
                 print(f"  {file.name}: {similarity}% similar")
-        
+
         # Sort files by creation time
         files_with_time = [(f, f.stat().st_ctime) for f in group]
         files_with_time.sort(key=lambda x: x[1])
-        
+
         # Keep the oldest file
         original_file = files_with_time[0][0]
-        print(f"Keeping oldest copy: {original_file} (created: {time.ctime(files_with_time[0][1])})")
-        
+        print(
+            f"Keeping oldest copy: {original_file} (created: {time.ctime(files_with_time[0][1])})"
+        )
+
         # Process newer copies
         for file_path, ctime in files_with_time[1:]:
-            print(f"Moving {match_type} file: {file_path} (created: {time.ctime(ctime)})")
+            print(
+                f"Moving {match_type} file: {file_path} (created: {time.ctime(ctime)})"
+            )
             if not args.dry_run:
                 try:
                     # Create backup path maintaining directory structure
                     rel_path = file_path.relative_to(file_path.parent.parent)
                     backup_path = Path(args.backup_dir) / rel_path
-                    
+
                     # Ensure backup directory exists
                     backup_path.parent.mkdir(parents=True, exist_ok=True)
-                    
+
                     # Move the file
                     shutil.move(str(file_path), str(backup_path))
                 except Exception as e:
@@ -547,7 +560,7 @@ def process_duplicate_directories(duplicates, args):
 
 def get_interactive_config():
     """Get configuration through interactive questionary prompts."""
-    
+
     # First, get the action type
     action = questionary.select(
         "What would you like to do?",
@@ -563,9 +576,7 @@ def get_interactive_config():
 
     # Get the directory/files to process
     paths = questionary.path(
-        "Select directory or file to process:",
-        only_directories=False,
-        multiple=True
+        "Select directory or file to process:", only_directories=False, multiple=True
     ).ask()
 
     if not paths:
@@ -574,7 +585,7 @@ def get_interactive_config():
     # Create a namespace object to match argparse structure
     args = argparse.Namespace()
     args.files = paths.split(",") if isinstance(paths, str) else paths
-    
+
     # Set defaults
     args.backup_dir = "_backup"
     args.dry_run = False
@@ -591,11 +602,13 @@ def get_interactive_config():
                 "Preview changes (dry run)",
                 "Show detailed progress",
             ],
-            default=["Preview changes (dry run)"]
+            default=["Preview changes (dry run)"],
         ).ask()
 
         args.use_fuzzy = "Use fuzzy matching for similar files" in duplicate_options
-        args.ignore_names = "Ignore filenames (match by content only)" in duplicate_options
+        args.ignore_names = (
+            "Ignore filenames (match by content only)" in duplicate_options
+        )
         args.dry_run = "Preview changes (dry run)" in duplicate_options
         args.verbose = "Show detailed progress" in duplicate_options
 
@@ -607,9 +620,9 @@ def get_interactive_config():
                     "95 - Nearly identical",
                     "90 - Very similar",
                     "85 - Similar",
-                    "80 - Somewhat similar"
+                    "80 - Somewhat similar",
                 ],
-                default="90 - Very similar"
+                default="90 - Very similar",
             ).ask()
             args.fuzzy_threshold = int(args.fuzzy_threshold.split()[0])
 
@@ -620,7 +633,7 @@ def get_interactive_config():
                     "Compare sample rates",
                     "Compare channel counts",
                 ],
-                default=["Compare file lengths", "Compare sample rates"]
+                default=["Compare file lengths", "Compare sample rates"],
             ).ask()
 
         # Get backup options
@@ -629,9 +642,9 @@ def get_interactive_config():
             choices=[
                 "Move to backup directory (safe)",
                 "Delete immediately (dangerous)",
-                "Preview only (no changes)"
+                "Preview only (no changes)",
             ],
-            default="Move to backup directory (safe)"
+            default="Move to backup directory (safe)",
         ).ask()
 
         args.backup_dir = "_backup" if "Move" in backup_choice else None
@@ -642,26 +655,21 @@ def get_interactive_config():
 
     # For sample shrinking, get all the conversion options
     args.bitdepth = questionary.select(
-        "Select target bit depth:",
-        choices=["8", "16", "24"],
-        default="16"
+        "Select target bit depth:", choices=["8", "16", "24"], default="16"
     ).ask()
     args.bitdepth = int(args.bitdepth)
 
     args.channels = questionary.select(
         "Select target channels:",
-        choices=[
-            "1 (mono)",
-            "2 (stereo)"
-        ],
-        default="2 (stereo)"
+        choices=["1 (mono)", "2 (stereo)"],
+        default="2 (stereo)",
     ).ask()
     args.channels = 1 if "1" in args.channels else 2
 
     args.samplerate = questionary.select(
         "Select target sample rate:",
         choices=["22050", "44100", "48000"],
-        default="44100"
+        default="44100",
     ).ask()
     args.samplerate = int(args.samplerate)
 
@@ -673,28 +681,25 @@ def get_interactive_config():
             "Pre-normalize before conversion",
             "Skip generating spectrograms",
             "Preview changes (dry run)",
-            "Process files in parallel"
-        ]
+            "Process files in parallel",
+        ],
     ).ask()
 
     args.auto_mono = "Auto-convert stereo to mono when possible" in advanced_options
     args.pre_normalize = "Pre-normalize before conversion" in advanced_options
     args.skip_spectrograms = "Skip generating spectrograms" in advanced_options
     args.dry_run = "Preview changes (dry run)" in advanced_options
-    
+
     if "Process files in parallel" in advanced_options:
         args.jobs = questionary.select(
-            "How many parallel jobs?",
-            choices=["2", "4", "8", "16"],
-            default="4"
+            "How many parallel jobs?", choices=["2", "4", "8", "16"], default="4"
         ).ask()
         args.jobs = int(args.jobs)
 
     if args.auto_mono:
         args.auto_mono_threshold = float(
             questionary.text(
-                "Auto-mono threshold in dB (default: -95.5):",
-                default="-95.5"
+                "Auto-mono threshold in dB (default: -95.5):", default="-95.5"
             ).ask()
         )
 
@@ -705,66 +710,70 @@ def process_duplicates(args):
     """Process both directory and file level duplicates with safety checks."""
     print("\nPhase 1: Searching for duplicate directories...")
     dir_duplicates = find_duplicate_directories(args.files)
-    
+
     if dir_duplicates:
-        print(f"\nFound {sum(len(v) - 1 for v in dir_duplicates.values())} duplicate directories")
-        
+        print(
+            f"\nFound {sum(len(v) - 1 for v in dir_duplicates.values())} duplicate directories"
+        )
+
         # Safety check: Verify directory contents match exactly
         verified_duplicates = {}
         for key, paths in dir_duplicates.items():
             dir_name, file_count, total_size = key
-            
+
             # Get file listing for each directory
             dir_contents = defaultdict(list)
             for path in paths:
-                files = sorted(f.relative_to(path) for f in path.rglob("*") if f.is_file())
+                files = sorted(
+                    f.relative_to(path) for f in path.rglob("*") if f.is_file()
+                )
                 content_hash = hashlib.sha256(str(files).encode()).hexdigest()
                 dir_contents[content_hash].append(path)
-            
+
             # Only keep directories with exactly matching contents
             for content_hash, matching_paths in dir_contents.items():
                 if len(matching_paths) > 1:
                     verified_duplicates[key + (content_hash,)] = matching_paths
-        
+
         if args.dry_run:
             print("\nDRY RUN - No directories will be moved")
         process_duplicate_directories(verified_duplicates, args)
     else:
         print("No duplicate directories found.")
-    
+
     print("\nPhase 2: Searching for duplicate files...")
     file_duplicates, fuzzy_groups = find_duplicate_files(args.files, args)
-    
+
     if file_duplicates:
         total_duplicates = sum(len(group) - 1 for group in file_duplicates)
         print(f"\nFound {total_duplicates} duplicate files")
-        
+
         # Additional safety checks for file processing
         safe_duplicates = []
         for group in file_duplicates:
             # Verify files are not symbolic links
             real_files = [f for f in group if not f.is_symlink()]
-            
+
             # Check if files are in use (on Windows) or locked
             available_files = []
             for file in real_files:
                 try:
-                    with open(file, 'rb') as f:
+                    with open(file, "rb") as f:
                         # Try to get a shared lock
                         pass
                     available_files.append(file)
                 except (IOError, OSError):
                     print(f"Warning: File {file} appears to be in use, skipping")
-            
+
             if len(available_files) > 1:
                 safe_duplicates.append(available_files)
-        
+
         if args.dry_run:
             print("\nDRY RUN - No files will be moved")
         process_duplicate_files(safe_duplicates, fuzzy_groups, args)
     else:
         print("No duplicate files found.")
-    
+
     print("\nDuplicate removal complete!")
 
 
@@ -776,7 +785,7 @@ def main():
     else:
         # Use interactive mode
         action, args = get_interactive_config()
-        
+
     if not args:
         return
 

From bfca75a5acc15458a92ad20d729f37a8704f287c Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 00:32:20 +0000
Subject: [PATCH 11/66] Nov 17, 2024, 4:32 PM

---
 sample-shrinker-python/requirements.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sample-shrinker-python/requirements.txt b/sample-shrinker-python/requirements.txt
index 4a880fc..07fcf64 100644
--- a/sample-shrinker-python/requirements.txt
+++ b/sample-shrinker-python/requirements.txt
@@ -1,4 +1,4 @@
-librosa==0.10.2.post1
-matplotlib==3.9.2
-numpy==2.1.2
-pydub==0.25.1
+librosa
+matplotlib
+numpy
+pydub

From b099b57b4b9a676808331a09330e2d775578ea11 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 00:32:40 +0000
Subject: [PATCH 12/66] Nov 17, 2024, 4:32 PM

---
 sample-shrinker-python/requirements.txt | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/sample-shrinker-python/requirements.txt b/sample-shrinker-python/requirements.txt
index 07fcf64..e5a6959 100644
--- a/sample-shrinker-python/requirements.txt
+++ b/sample-shrinker-python/requirements.txt
@@ -1,4 +1,7 @@
-librosa
-matplotlib
-numpy
-pydub
+librosa==0.10.2.post1
+matplotlib==3.9.2
+numpy==2.1.3
+pydub==0.25.1
+questionary==2.0.1
+soundfile==0.12.1
+ssdeep==3.4

From a8e578755356dfb482e02cf0576330cd8f76adf0 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 00:33:24 +0000
Subject: [PATCH 13/66] Nov 17, 2024, 4:33 PM

---
 sample-shrinker-python/README.md | 232 +++++++++++++++++++------------
 1 file changed, 140 insertions(+), 92 deletions(-)

diff --git a/sample-shrinker-python/README.md b/sample-shrinker-python/README.md
index 44c5d3c..26aff3f 100644
--- a/sample-shrinker-python/README.md
+++ b/sample-shrinker-python/README.md
@@ -1,130 +1,178 @@
-
 # Sample Shrinker
 
-A Python script to conditionally batch-convert audio samples into minimal `.wav` files, based on target criteria. This script is useful for saving storage space and reducing the I/O stress during simultaneous real-time streaming of multiple `.wav` files on devices like the Dirtywave M8 tracker.
-
-If you have directories full of 24/32-bit stereo `.wav` files or stereo samples with effectively mono content, this script can reclaim wasted storage space and reduce I/O stress on your SD card. It can also detect if the content of a stereo sample is actually mono and convert it automatically!
+A Python script to conditionally batch-convert audio samples into minimal `.wav` files and manage duplicate audio files. This script is useful for saving storage space, reducing I/O stress during simultaneous real-time streaming of multiple `.wav` files, and cleaning up duplicate samples across your library.
 
 ## Features
-- **Conditional Conversion**: Only converts samples that don't meet the target criteria (bit depth, channels, etc.).
-- **Auto-Mono**: Automatically convert stereo samples to mono if the content is effectively mono, with a configurable threshold.
-- **Backup and Spectrogram Generation**: Converted files are backed up (unless disabled) and spectrograms of old vs. new files are generated.
-- **Pre-Normalization**: Optionally normalize samples before downsampling the bit depth to preserve dynamic range.
-- **Parallel Processing**: Use the `-j` option to process multiple files in parallel for faster conversions.
 
-## Requirements
+### Sample Conversion
+- **Conditional Conversion**: Only converts samples that don't meet the target criteria (bit depth, channels, etc.)
+- **Auto-Mono**: Automatically convert stereo samples to mono if the content is effectively mono
+- **Backup and Spectrogram Generation**: Converted files are backed up with original folder structure preserved
+- **Pre-Normalization**: Optionally normalize samples before downsampling bit depth
+- **Parallel Processing**: Process multiple files simultaneously for faster conversions
 
-- Python 3.10 or later
-- `pydub`, `librosa`, `matplotlib`, `soundfile` (install with `pip`)
-- `ffmpeg` or `libav` installed for `pydub`
+### Duplicate Management
+- **Multi-Level Detection**: Finds duplicates at both directory and file levels
+- **Intelligent Matching**: Uses file size, content hashes, and optional fuzzy matching
+- **Safe Defaults**: Moves duplicates to backup instead of deleting
+- **Fuzzy Audio Matching**: Can detect similar audio files using configurable criteria
+- **Directory Structure**: Maintains original folder structure in backup directory
 
-Install dependencies:
-```bash
-pip install -r requirements.txt
-```
+## Requirements
 
-You will also need `ffmpeg`:
+- Python 3.10 or later
+- Required Python packages (install with `pip install -r requirements.txt`):
+  ```
+  librosa==0.10.2.post1
+  matplotlib==3.9.2
+  numpy==2.1.2
+  pydub==0.25.1
+  questionary==2.0.1
+  ssdeep==3.4
+  ```
+- `ffmpeg` or `libav` installed for audio processing
+
+Install system dependencies:
 ```bash
 # MacOS with Homebrew
-brew install ffmpeg
+brew install ffmpeg ssdeep
 
 # Ubuntu/Debian
-sudo apt install ffmpeg
+sudo apt install ffmpeg ssdeep
 ```
 
 ## Usage
 
+### Interactive Mode
+Simply run the script without arguments for an interactive interface:
 ```bash
-python sample-shrinker.py [options] FILE|DIRECTORY ...
+python sample-shrinker.py
 ```
 
-### Basic Example:
+The interactive mode will guide you through:
+1. Choosing between sample conversion or duplicate removal
+2. Selecting directories/files to process
+3. Configuring operation-specific options
+
+### Command Line Mode
+For automation or scripting:
 ```bash
-python sample-shrinker.py directory_of_samples/
+python sample-shrinker.py [options] FILE|DIRECTORY ...
 ```
 
-This will:
-- Convert samples in place with a target bit depth of 16 and stereo channels unchanged.
-- Back up the original files in a parallel `_backup/` directory.
-- Generate `.png` spectrograms comparing old and new files.
-
-### Options:
-- `-b BIT_DEPTH`: Set the target bit depth (default: 16). Samples will only be downsampled unless `-B` is set.
-- `-B MIN_BIT_DEPTH`: Set a minimum bit depth. This will upsample any samples below the minimum.
-- `-c CHANNELS`: Set the target number of output channels (default: 2). For mono, use `-c 1`.
-- `-r SAMPLERATE`: Set the target sample rate (default: 44100 Hz).
-- `-R MIN_SAMPLERATE`: Set a minimum sample rate. Samples below this will be upsampled.
-- `-a`: Automatically convert stereo samples to mono if they are effectively mono.
-- `-A DB_THRESHOLD`: Set the auto-mono threshold in dB (default: `-95.5`). This implies `-a`.
-- `-p`: Pre-normalize samples before downsampling bit depth.
-- `-S`: Skip generating spectrogram files.
-- `-d BACKUP_DIR`: Set a directory to store backups. Use `-d -` to disable backups and spectrogram generation.
-- `-l`: List files and preview changes without converting.
-- `-n`: Dry run—log actions without converting any files.
-- `-j JOBS`: Process files in parallel with multiple jobs (default: 1).
-- `-v`: Increase verbosity.
+## Sample Conversion Options
+
+### Interactive Configuration
+When choosing "Shrink samples", you can configure:
+- Target bit depth (8, 16, or 24 bit)
+- Channel count (mono or stereo)
+- Sample rate (22050, 44100, or 48000 Hz)
+- Advanced options:
+  - Auto-mono conversion
+  - Pre-normalization
+  - Spectrogram generation
+  - Parallel processing
+  - Dry run preview
+
+### Command Line Options
+- `-b BIT_DEPTH`: Set target bit depth (default: 16)
+- `-B MIN_BIT_DEPTH`: Set minimum bit depth
+- `-c CHANNELS`: Set target channels (1=mono, 2=stereo)
+- `-r SAMPLERATE`: Set target sample rate (default: 44100)
+- `-a`: Enable auto-mono conversion
+- `-p`: Enable pre-normalization
+- `-j JOBS`: Set number of parallel jobs
+- `-n`: Preview changes without converting
+- `-d BACKUP_DIR`: Set backup directory (default: _backup)
+
+## Duplicate Removal Options
+
+### Interactive Configuration
+When choosing "Remove duplicates", you can configure:
+- Fuzzy matching options:
+  - Similarity threshold (80-95%)
+  - File length comparison
+  - Sample rate comparison
+  - Channel count comparison
+- Filename handling:
+  - Match by name and content
+  - Match by content only
+- Duplicate handling:
+  - Move to backup (safe)
+  - Delete immediately
+  - Preview only
+
+### Process
+1. **Directory Level**:
+   - Finds directories with matching names
+   - Compares file counts and total sizes
+   - Verifies exact content matches
+   - Keeps oldest copy, moves others to backup
+
+2. **File Level**:
+   - Groups files by size
+   - Performs quick hash comparison
+   - Optionally uses fuzzy matching for similar audio
+   - Maintains original directory structure in backup
+
+### Safety Features
+- Dry run option to preview changes
+- Backup by default instead of deletion
+- Verification of file accessibility
+- Symlink detection
+- Lock checking
+- Detailed progress reporting
 
 ## Examples
 
-### Convert a Directory with Default Settings
+### Basic Sample Conversion
 ```bash
-python sample-shrinker.py my_samples/
-```
-- Convert samples to 16-bit with channels left unchanged.
-- Back up the original files under `_backup/`.
-- Generate spectrogram `.png` files for comparison.
+# Interactive mode (recommended)
+python sample-shrinker.py
 
-### Convert to Mono Automatically for Effectively Mono Samples
-```bash
-python sample-shrinker.py -a my_samples/
+# Command line with specific options
+python sample-shrinker.py -c 1 -b 16 -a samples/
 ```
-- Automatically convert stereo samples to mono if they are effectively mono (i.e., the difference between the channels is below the threshold).
 
-### Preview Changes Without Modifying Files
+### Duplicate Removal
 ```bash
-python sample-shrinker.py -l -a -A -80 my_samples/
-```
-- Lists all files and shows which ones would be changed without actually modifying them. The threshold for auto-mono is set to -80 dB.
+# Interactive mode with guided configuration
+python sample-shrinker.py
 
-### Convert and Skip Backups
-```bash
-python sample-shrinker.py -d - my_samples/
+# Preview duplicate detection
+python sample-shrinker.py samples/ -n
 ```
-- Converts files but does not create backups or generate spectrograms.
 
-### Pre-Normalize Before Downsampling
-```bash
-python sample-shrinker.py -p my_samples/
+### Output Example
 ```
-- Normalize the audio before downsampling the bit depth to preserve as much dynamic range as possible.
-
-### Process Files in Parallel
-```bash
-python sample-shrinker.py -j 10 my_samples/
+Processing file: samples/drums/kick.wav
+samples/drums/kick.wav [CHANGED]: bit depth 24 -> 16, auto-mono
+
+Found duplicate directories named 'drums' with 10 files (1.2MB):
+Keeping oldest copy: samples/drums (created: Thu Mar 21 10:00:00 2024)
+Moving duplicate: samples/backup/drums (created: Thu Mar 21 11:30:00 2024)
+
+Found similar files: 'snare.wav' (250KB)
+Similarity scores:
+  snare_old.wav: 92% similar
+  snare_copy.wav: 95% similar
+Keeping oldest copy: samples/snare.wav
+Moving similar files to backup...
 ```
-- Process up to 10 files at the same time for faster batch conversion.
-
-## Output Example:
 
-```bash
-Processing file: /Volumes/Untitled/Samples/wii sports/sound effects/Baseball/Sample_0028.wav
-/Volumes/Untitled/Samples/wii sports/sound effects/Baseball/Sample_0028.wav [UNCHANGED]
-Processing file: /Volumes/Untitled/Samples/wii sports/sound effects/Boxing/Sample_0029.wav
-/Volumes/Untitled/Samples/wii sports/sound effects/Baseball/Sample_0029.wav [CHANGED]: sample rate 48000 -> 44100
-Processing file: /Volumes/Untitled/Samples/wii sports/sound effects/Boxing/Sample_0030.wav
-/Volumes/Untitled/Samples/wii sports/sound effects/Baseball/Sample_0030.wav[CHANGED]: auto-mono
+## Directory Structure
+```
+samples/                  # Original directory
+  drums/
+    kick.wav
+    snare.wav
+_backup/                 # Backup directory
+  samples/               # Original structure preserved
+    drums/
+      kick.wav.old      # Original files
+      kick.wav.old.png  # Spectrograms
+      kick.wav.new.png
 ```
 
-In the updated output format:
-- The script logs each file being processed with the `Processing file:` prefix.
-- After processing, each file will either be marked as `[UNCHANGED]` or `[CHANGED]` depending on whether any modifications (bit depth, sample rate, or channels) were made.
-- If changes are made, the specific adjustments (e.g., `sample rate 48000 -> 44100`) will be displayed.
-  
-### Additional Details:
-- The `[CHANGED]` notation follows files that were modified.
-- `[UNCHANGED]` appears for files that meet the target criteria and required no modifications.
-- **Changes made**:
-  - Sample rate conversions (e.g., `sample rate 48000 -> 44100`).
-  - Bit depth reductions (e.g., `bit depth 32 -> 16`).
-  - Channel conversions (e.g., stereo to mono).
-- Verbose output (`-v`) will print additional information such as ongoing file processing.
+## Contributing
+Contributions are welcome! Please feel free to submit a Pull Request.

From 1edd19091dbd09ab7f64633e3a2b43c9ad74bd41 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 05:59:12 +0000
Subject: [PATCH 14/66] Nov 17, 2024, 9:59 PM

---
 sample-shrinker-python/sample-shrinker.py | 151 ++++++++++++++--------
 1 file changed, 96 insertions(+), 55 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index ff0e2fe..5d1fa1f 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -14,7 +14,8 @@
 import numpy as np
 import questionary
 import soundfile as sf
-import ssdeep  # Add to imports
+import scipy.signal
+from scipy.io import wavfile
 from pydub import AudioSegment
 
 
@@ -364,11 +365,63 @@ def is_audio_file(file_path):
     return file_path.lower().endswith((".wav", ".mp3"))
 
 
+def get_audio_fingerprint(file_path):
+    """Generate an audio fingerprint using cross-correlation."""
+    try:
+        # Load audio file
+        audio = AudioSegment.from_file(file_path)
+        # Convert to mono for comparison
+        if audio.channels > 1:
+            audio = audio.set_channels(1)
+        
+        # Convert to numpy array
+        samples = np.array(audio.get_array_of_samples())
+        
+        # Normalize
+        samples = samples / np.max(np.abs(samples))
+        
+        # Get a signature using peaks in frequency domain
+        freqs, times, spectrogram = scipy.signal.spectrogram(
+            samples,
+            audio.frame_rate,
+            nperseg=1024,
+            noverlap=512
+        )
+        
+        # Get the strongest frequencies
+        peaks = np.mean(spectrogram, axis=1)
+        # Normalize the peaks
+        peaks = peaks / np.max(peaks)
+        
+        return peaks
+    except Exception as e:
+        print(f"Error generating audio fingerprint for {file_path}: {e}")
+        return None
+
+
+def compare_audio_similarity(file1_fingerprint, file2_fingerprint):
+    """Compare two audio fingerprints and return similarity score."""
+    if file1_fingerprint is None or file2_fingerprint is None:
+        return 0
+    
+    # Ensure same length for comparison
+    min_len = min(len(file1_fingerprint), len(file2_fingerprint))
+    f1 = file1_fingerprint[:min_len]
+    f2 = file2_fingerprint[:min_len]
+    
+    # Calculate correlation coefficient
+    correlation = np.corrcoef(f1, f2)[0, 1]
+    # Convert to percentage and handle NaN
+    similarity = float(max(0, correlation) * 100)
+    return similarity if not np.isnan(similarity) else 0
+
+
 def find_duplicate_files(paths, args):
-    """Find duplicate files using a multi-stage approach with optional fuzzy matching."""
+    """Find duplicate files using a multi-stage approach with audio fingerprinting."""
     print("Scanning for duplicate files...")
     size_groups = defaultdict(list)
-
+    
+    # First pass: group by size
     for path in paths:
         path = Path(path)
         if path.is_dir():
@@ -378,80 +431,68 @@ def find_duplicate_files(paths, args):
                         print(f"Scanning: {file_path}")
                     size = file_path.stat().st_size
                     size_groups[size].append(file_path)
-
+    
     hash_groups = defaultdict(list)
-    fuzzy_groups = []
-
+    similar_groups = []
+    
+    # Second pass: check content
     for size, file_paths in size_groups.items():
         if len(file_paths) > 1:
             if args.verbose:
                 print(f"\nChecking {len(file_paths)} files of size {size} bytes...")
-
-            # First pass: exact matches
+            
+            # First try exact matches
             for file_path in file_paths:
                 try:
                     file_hash = get_file_hash(file_path, fuzzy=False)
                     if args.ignore_names:
-                        # Use only the hash for grouping if ignoring names
                         hash_groups[file_hash].append(file_path)
                     else:
-                        # Include name in grouping key
                         name_key = file_path.stem.lower()
                         hash_groups[(name_key, file_hash)].append(file_path)
                 except Exception as e:
                     print(f"Error hashing file {file_path}: {e}")
-
-            # Second pass: fuzzy matching if enabled
+            
+            # Then check for similar audio content
             if args.use_fuzzy:
-                unmatched = [
-                    f
-                    for f in file_paths
-                    if not any(f in g for g in hash_groups.values() if len(g) > 1)
-                ]
+                unmatched = [f for f in file_paths 
+                           if not any(f in g for g in hash_groups.values() if len(g) > 1)]
+                
                 if len(unmatched) > 1:
-                    fuzzy_matches = defaultdict(list)
-
+                    # Generate fingerprints for all unmatched files
+                    fingerprints = {}
                     for file_path in unmatched:
-                        try:
-                            audio = AudioSegment.from_file(str(file_path))
-                            fuzzy_key = []
-
-                            if "Compare file lengths" in args.fuzzy_options:
-                                fuzzy_key.append(len(audio))
-                            if "Compare sample rates" in args.fuzzy_options:
-                                fuzzy_key.append(audio.frame_rate)
-                            if "Compare channel counts" in args.fuzzy_options:
-                                fuzzy_key.append(audio.channels)
-
-                            fuzzy_hash = get_file_hash(file_path, fuzzy=True)
-                            if fuzzy_hash:
-                                fuzzy_matches[(tuple(fuzzy_key), fuzzy_hash)].append(
-                                    file_path
+                        fingerprint = get_audio_fingerprint(file_path)
+                        if fingerprint is not None:
+                            fingerprints[file_path] = fingerprint
+                    
+                    # Compare fingerprints
+                    processed = set()
+                    for file1 in fingerprints:
+                        if file1 in processed:
+                            continue
+                        
+                        similar_files = [file1]
+                        for file2 in fingerprints:
+                            if file2 != file1 and file2 not in processed:
+                                similarity = compare_audio_similarity(
+                                    fingerprints[file1],
+                                    fingerprints[file2]
                                 )
-                        except Exception as e:
-                            print(f"Error analyzing {file_path}: {e}")
-
-                    # Compare fuzzy matches
-                    for key, matches in fuzzy_matches.items():
-                        if len(matches) > 1:
-                            base_hash = get_file_hash(matches[0], fuzzy=True)
-                            similar_files = [matches[0]]
-
-                            for other_file in matches[1:]:
-                                other_hash = get_file_hash(other_file, fuzzy=True)
-                                similarity = ssdeep.compare(base_hash, other_hash)
                                 if similarity >= args.fuzzy_threshold:
-                                    similar_files.append(other_file)
-
-                            if len(similar_files) > 1:
-                                fuzzy_groups.append(similar_files)
-
-    # Combine results based on exact and fuzzy matches
+                                    similar_files.append(file2)
+                                    processed.add(file2)
+                        
+                        if len(similar_files) > 1:
+                            similar_groups.append(similar_files)
+                            processed.add(file1)
+    
+    # Combine results
     duplicates = [group for group in hash_groups.values() if len(group) > 1]
     if args.use_fuzzy:
-        duplicates.extend(fuzzy_groups)
-
-    return duplicates, fuzzy_groups
+        duplicates.extend(similar_groups)
+    
+    return duplicates, similar_groups
 
 
 def process_duplicate_files(duplicates, fuzzy_groups, args):

From 42f4cdf9c039f7f44cabdd30566e8b5180269c78 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:02:24 +0000
Subject: [PATCH 15/66] Nov 17, 2024, 10:02 PM

---
 sample-shrinker-python/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sample-shrinker-python/requirements.txt b/sample-shrinker-python/requirements.txt
index e5a6959..a1f5fa2 100644
--- a/sample-shrinker-python/requirements.txt
+++ b/sample-shrinker-python/requirements.txt
@@ -4,4 +4,4 @@ numpy==2.1.3
 pydub==0.25.1
 questionary==2.0.1
 soundfile==0.12.1
-ssdeep==3.4
+scipy>=1.11.0
\ No newline at end of file

From f714be1787d6c2e9c06eb4ac9d0db4e1dfbedea6 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:03:34 +0000
Subject: [PATCH 16/66] Nov 17, 2024, 10:03 PM

---
 sample-shrinker-python/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sample-shrinker-python/requirements.txt b/sample-shrinker-python/requirements.txt
index a1f5fa2..ee3cb89 100644
--- a/sample-shrinker-python/requirements.txt
+++ b/sample-shrinker-python/requirements.txt
@@ -1,6 +1,6 @@
 librosa==0.10.2.post1
 matplotlib==3.9.2
-numpy==2.1.3
+numpy
 pydub==0.25.1
 questionary==2.0.1
 soundfile==0.12.1

From df3cae0675587cfb11ffbcf41d6cd571261781bc Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:04:23 +0000
Subject: [PATCH 17/66] Nov 17, 2024, 10:04 PM

---
 sample-shrinker-python/sample-shrinker.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 5d1fa1f..46edccc 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -4,6 +4,7 @@
 import hashlib
 import os
 import shutil
+import sys
 import time
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -333,14 +334,10 @@ def run_in_parallel(file_list, args):
 
 
 def get_file_hash(file_path, fuzzy=False, chunk_size=1024 * 1024):
-    """Calculate file hash using either SHA-256 or fuzzy hashing."""
+    """Calculate file hash using either SHA-256 or audio fingerprinting."""
     if fuzzy:
-        try:
-            # Generate fuzzy hash for the file
-            return ssdeep.hash_from_file(str(file_path))
-        except Exception as e:
-            print(f"Error generating fuzzy hash for {file_path}: {e}")
-            return None
+        # Use our audio fingerprinting instead of ssdeep
+        return get_audio_fingerprint(file_path)
     else:
         # Standard SHA-256 hash with quick check
         sha256_hash = hashlib.sha256()
@@ -507,12 +504,12 @@ def process_duplicate_files(duplicates, fuzzy_groups, args):
 
         if is_fuzzy:
             # For fuzzy matches, show similarity percentages
-            base_hash = get_file_hash(group[0], fuzzy=True)
+            base_fingerprint = get_audio_fingerprint(group[0])
             print("Similarity scores:")
             for file in group[1:]:
-                file_hash = get_file_hash(file, fuzzy=True)
-                similarity = ssdeep.compare(base_hash, file_hash)
-                print(f"  {file.name}: {similarity}% similar")
+                file_fingerprint = get_audio_fingerprint(file)
+                similarity = compare_audio_similarity(base_fingerprint, file_fingerprint)
+                print(f"  {file.name}: {similarity:.1f}% similar")
 
         # Sort files by creation time
         files_with_time = [(f, f.stat().st_ctime) for f in group]

From 86f2c8800426ae42bedbf9986b73a7d565ff17ed Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:07:23 +0000
Subject: [PATCH 18/66] Nov 17, 2024, 10:07 PM

---
 sample-shrinker-python/sample-shrinker.py | 25 ++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 46edccc..8796cd8 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -598,7 +598,7 @@ def process_duplicate_directories(duplicates, args):
 
 def get_interactive_config():
     """Get configuration through interactive questionary prompts."""
-
+    
     # First, get the action type
     action = questionary.select(
         "What would you like to do?",
@@ -613,16 +613,31 @@ def get_interactive_config():
         return None, None
 
     # Get the directory/files to process
-    paths = questionary.path(
-        "Select directory or file to process:", only_directories=False, multiple=True
-    ).ask()
+    paths = []
+    while True:
+        path = questionary.path(
+            "Select directory or file to process (press Enter with empty path when done):",
+            only_directories=False,
+        ).ask()
+        
+        if not path:  # Empty input
+            if paths:  # If we have at least one path, break
+                break
+            else:  # If no paths yet, ask again
+                print("Please select at least one directory or file.")
+                continue
+        
+        paths.append(path)
+        
+        if not questionary.confirm("Add another path?", default=False).ask():
+            break
 
     if not paths:
         return None, None
 
     # Create a namespace object to match argparse structure
     args = argparse.Namespace()
-    args.files = paths.split(",") if isinstance(paths, str) else paths
+    args.files = paths
 
     # Set defaults
     args.backup_dir = "_backup"

From 94d5a2af1c3ae3049803006360f955241adff92d Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:08:14 +0000
Subject: [PATCH 19/66] Nov 17, 2024, 10:08 PM

---
 sample-shrinker-python/sample-shrinker.py | 42 ++++++++++++++++++++---
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 8796cd8..5f21d6a 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -639,11 +639,22 @@ def get_interactive_config():
     args = argparse.Namespace()
     args.files = paths
 
-    # Set defaults
+    # Set ALL default values (matching parse_args defaults)
     args.backup_dir = "_backup"
     args.dry_run = False
     args.verbose = False
     args.ext = "wav,mp3"
+    args.bitdepth = 16
+    args.min_bitdepth = None
+    args.channels = 2
+    args.samplerate = 44100
+    args.min_samplerate = None
+    args.auto_mono = False
+    args.auto_mono_threshold = -95.5
+    args.skip_spectrograms = False
+    args.pre_normalize = False
+    args.list = False
+    args.jobs = 1
 
     if action == "Remove duplicate directories":
         # For duplicate removal, get configuration options
@@ -708,7 +719,9 @@ def get_interactive_config():
 
     # For sample shrinking, get all the conversion options
     args.bitdepth = questionary.select(
-        "Select target bit depth:", choices=["8", "16", "24"], default="16"
+        "Select target bit depth:", 
+        choices=["8", "16", "24"], 
+        default="16"
     ).ask()
     args.bitdepth = int(args.bitdepth)
 
@@ -735,6 +748,8 @@ def get_interactive_config():
             "Skip generating spectrograms",
             "Preview changes (dry run)",
             "Process files in parallel",
+            "Set minimum sample rate",
+            "Set minimum bit depth"
         ],
     ).ask()
 
@@ -745,14 +760,33 @@ def get_interactive_config():
 
     if "Process files in parallel" in advanced_options:
         args.jobs = questionary.select(
-            "How many parallel jobs?", choices=["2", "4", "8", "16"], default="4"
+            "How many parallel jobs?", 
+            choices=["2", "4", "8", "16"], 
+            default="4"
         ).ask()
         args.jobs = int(args.jobs)
 
+    if "Set minimum sample rate" in advanced_options:
+        args.min_samplerate = questionary.select(
+            "Select minimum sample rate:",
+            choices=["22050", "44100", "48000"],
+            default="22050"
+        ).ask()
+        args.min_samplerate = int(args.min_samplerate)
+
+    if "Set minimum bit depth" in advanced_options:
+        args.min_bitdepth = questionary.select(
+            "Select minimum bit depth:",
+            choices=["8", "16", "24"],
+            default="16"
+        ).ask()
+        args.min_bitdepth = int(args.min_bitdepth)
+
     if args.auto_mono:
         args.auto_mono_threshold = float(
             questionary.text(
-                "Auto-mono threshold in dB (default: -95.5):", default="-95.5"
+                "Auto-mono threshold in dB (default: -95.5):", 
+                default="-95.5"
             ).ask()
         )
 

From f6d781ffb6c7a0c774f70b559154662861e597e7 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:09:18 +0000
Subject: [PATCH 20/66] Nov 17, 2024, 10:09 PM

---
 sample-shrinker-python/sample-shrinker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 5f21d6a..5cab372 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -870,7 +870,7 @@ def main():
         args = parse_args()
         action = "shrink"  # Default to shrink mode for command line
     else:
-        # Use interactive mode
+        # Use interactive mode with saved configuration
         action, args = get_interactive_config()
 
     if not args:

From 3015985e63e9c0914b08f93c87d633ab306f9bcd Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:18:36 +0000
Subject: [PATCH 21/66] Nov 17, 2024, 10:18 PM

---
 sample-shrinker-python/README.md | 55 ++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 21 deletions(-)

diff --git a/sample-shrinker-python/README.md b/sample-shrinker-python/README.md
index 26aff3f..20f36af 100644
--- a/sample-shrinker-python/README.md
+++ b/sample-shrinker-python/README.md
@@ -13,9 +13,9 @@ A Python script to conditionally batch-convert audio samples into minimal `.wav`
 
 ### Duplicate Management
 - **Multi-Level Detection**: Finds duplicates at both directory and file levels
-- **Intelligent Matching**: Uses file size, content hashes, and optional fuzzy matching
+- **Intelligent Matching**: Uses file size, content hashes, and audio fingerprinting
+- **Audio Fingerprinting**: Uses spectral analysis to detect similar audio content
 - **Safe Defaults**: Moves duplicates to backup instead of deleting
-- **Fuzzy Audio Matching**: Can detect similar audio files using configurable criteria
 - **Directory Structure**: Maintains original folder structure in backup directory
 
 ## Requirements
@@ -25,34 +25,36 @@ A Python script to conditionally batch-convert audio samples into minimal `.wav`
   ```
   librosa==0.10.2.post1
   matplotlib==3.9.2
-  numpy==2.1.2
+  numpy
   pydub==0.25.1
   questionary==2.0.1
-  ssdeep==3.4
+  soundfile==0.12.1
+  scipy>=1.11.0
   ```
 - `ffmpeg` or `libav` installed for audio processing
 
 Install system dependencies:
 ```bash
 # MacOS with Homebrew
-brew install ffmpeg ssdeep
+brew install ffmpeg
 
 # Ubuntu/Debian
-sudo apt install ffmpeg ssdeep
+sudo apt install ffmpeg
 ```
 
 ## Usage
 
-### Interactive Mode
-Simply run the script without arguments for an interactive interface:
+### Interactive Mode (Recommended)
+Simply run the script without arguments:
 ```bash
 python sample-shrinker.py
 ```
 
-The interactive mode will guide you through:
+The interactive interface will guide you through:
 1. Choosing between sample conversion or duplicate removal
-2. Selecting directories/files to process
+2. Selecting directories/files to process (add multiple paths)
 3. Configuring operation-specific options
+4. Setting advanced parameters
 
 ### Command Line Mode
 For automation or scripting:
@@ -63,7 +65,7 @@ python sample-shrinker.py [options] FILE|DIRECTORY ...
 ## Sample Conversion Options
 
 ### Interactive Configuration
-When choosing "Shrink samples", you can configure:
+When choosing "Shrink samples", configure:
 - Target bit depth (8, 16, or 24 bit)
 - Channel count (mono or stereo)
 - Sample rate (22050, 44100, or 48000 Hz)
@@ -72,6 +74,8 @@ When choosing "Shrink samples", you can configure:
   - Pre-normalization
   - Spectrogram generation
   - Parallel processing
+  - Minimum sample rate
+  - Minimum bit depth
   - Dry run preview
 
 ### Command Line Options
@@ -79,6 +83,7 @@ When choosing "Shrink samples", you can configure:
 - `-B MIN_BIT_DEPTH`: Set minimum bit depth
 - `-c CHANNELS`: Set target channels (1=mono, 2=stereo)
 - `-r SAMPLERATE`: Set target sample rate (default: 44100)
+- `-R MIN_SAMPLERATE`: Set minimum sample rate
 - `-a`: Enable auto-mono conversion
 - `-p`: Enable pre-normalization
 - `-j JOBS`: Set number of parallel jobs
@@ -88,8 +93,8 @@ When choosing "Shrink samples", you can configure:
 ## Duplicate Removal Options
 
 ### Interactive Configuration
-When choosing "Remove duplicates", you can configure:
-- Fuzzy matching options:
+When choosing "Remove duplicates", configure:
+- Audio matching options:
   - Similarity threshold (80-95%)
   - File length comparison
   - Sample rate comparison
@@ -102,7 +107,7 @@ When choosing "Remove duplicates", you can configure:
   - Delete immediately
   - Preview only
 
-### Process
+### Detection Process
 1. **Directory Level**:
    - Finds directories with matching names
    - Compares file counts and total sizes
@@ -110,11 +115,18 @@ When choosing "Remove duplicates", you can configure:
    - Keeps oldest copy, moves others to backup
 
 2. **File Level**:
-   - Groups files by size
-   - Performs quick hash comparison
-   - Optionally uses fuzzy matching for similar audio
+   - Groups files by size (fast initial filter)
+   - Performs quick hash comparison for exact matches
+   - Uses audio fingerprinting for similar content detection
    - Maintains original directory structure in backup
 
+### Audio Fingerprinting
+- Converts audio to mono for comparison
+- Generates spectral fingerprints
+- Compares frequency content
+- Provides similarity scores as percentages
+- Configurable similarity threshold
+
 ### Safety Features
 - Dry run option to preview changes
 - Backup by default instead of deletion
@@ -122,12 +134,13 @@ When choosing "Remove duplicates", you can configure:
 - Symlink detection
 - Lock checking
 - Detailed progress reporting
+- Original folder structure preserved in backups
 
 ## Examples
 
 ### Basic Sample Conversion
 ```bash
-# Interactive mode (recommended)
+# Interactive mode with guided configuration
 python sample-shrinker.py
 
 # Command line with specific options
@@ -136,7 +149,7 @@ python sample-shrinker.py -c 1 -b 16 -a samples/
 
 ### Duplicate Removal
 ```bash
-# Interactive mode with guided configuration
+# Interactive mode (recommended)
 python sample-shrinker.py
 
 # Preview duplicate detection
@@ -154,8 +167,8 @@ Moving duplicate: samples/backup/drums (created: Thu Mar 21 11:30:00 2024)
 
 Found similar files: 'snare.wav' (250KB)
 Similarity scores:
-  snare_old.wav: 92% similar
-  snare_copy.wav: 95% similar
+  snare_old.wav: 92.5% similar
+  snare_copy.wav: 95.8% similar
 Keeping oldest copy: samples/snare.wav
 Moving similar files to backup...
 ```

From 34c3cfbbfc71f41fac9859f49881341a0e65e6c8 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:18:51 +0000
Subject: [PATCH 22/66] Nov 17, 2024, 10:18 PM

---
 sample-shrinker-python/requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sample-shrinker-python/requirements.txt b/sample-shrinker-python/requirements.txt
index ee3cb89..ac87aa6 100644
--- a/sample-shrinker-python/requirements.txt
+++ b/sample-shrinker-python/requirements.txt
@@ -1,7 +1,7 @@
 librosa==0.10.2.post1
 matplotlib==3.9.2
-numpy
+numpy==2.1.3
 pydub==0.25.1
 questionary==2.0.1
+scipy==1.14.1
 soundfile==0.12.1
-scipy>=1.11.0
\ No newline at end of file

From 62d37e8c76e8305ab361d8d8771c03a3cdc6e4b1 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:24:51 +0000
Subject: [PATCH 23/66] Nov 17, 2024, 10:24 PM

---
 sample-shrinker-python/sample-shrinker.py | 130 ++++++++++++----------
 1 file changed, 74 insertions(+), 56 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 5cab372..7542910 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -18,6 +18,11 @@
 import scipy.signal
 from scipy.io import wavfile
 from pydub import AudioSegment
+from rich.console import Console
+from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
+from rich.panel import Panel
+from rich.text import Text
+from rich import print as rprint
 
 
 def usage_intro():
@@ -145,10 +150,14 @@ def reencode_audio(file_path):
     return None
 
 
-def process_audio(file_path, args, dry_run=False):
+def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
     """Main function to process audio files based on arguments."""
     try:
-        print(f"Processing file: {file_path}")
+        if progress:
+            progress.update(task_id, description=f"Processing: {Path(file_path).name}")
+        else:
+            console.print(f"Processing file: [cyan]{file_path}[/cyan]")
+            
         audio = AudioSegment.from_file(file_path)
         modified = False
         change_reason = []
@@ -201,7 +210,12 @@ def process_audio(file_path, args, dry_run=False):
             modified = True
 
         if modified:
-            print(f"{file_path} [CHANGED]: {', '.join(change_reason)}")
+            status = Text()
+            status.append(f"{file_path} ", style="cyan")
+            status.append("[CHANGED]: ", style="yellow")
+            status.append(", ".join(change_reason), style="green")
+            console.print(status)
+            
             if not dry_run:
                 # Backup the original file if required
                 if args.backup_dir != "-":
@@ -223,10 +237,13 @@ def process_audio(file_path, args, dry_run=False):
                         file_path, output_file, os.path.dirname(backup_path)
                     )
         else:
-            print(f"{file_path} [UNCHANGED]")
+            status = Text()
+            status.append(f"{file_path} ", style="cyan")
+            status.append("[UNCHANGED]", style="blue")
+            console.print(status)
 
     except Exception as e:
-        print(f"Error processing {file_path}: {e}")
+        console.print(f"[red]Error processing {file_path}: {e}[/red]")
 
         # Try re-encoding the file if ffmpeg failed
         reencoded_file = reencode_audio(file_path)
@@ -235,8 +252,8 @@ def process_audio(file_path, args, dry_run=False):
                 # Retry the process with the re-encoded file
                 process_audio(reencoded_file, args, dry_run)
             except Exception as retry_error:
-                print(
-                    f"Failed to process the re-encoded file {reencoded_file}: {retry_error}"
+                console.print(
+                    f"[red]Failed to process the re-encoded file {reencoded_file}: {retry_error}[/red]"
                 )
 
 
@@ -313,22 +330,38 @@ def collect_files(args):
 
 
 def run_in_parallel(file_list, args):
-    """Run the audio processing in parallel."""
+    """Run the audio processing in parallel with progress bar."""
     try:
-        with ThreadPoolExecutor(max_workers=args.jobs) as executor:
-            futures = {
-                executor.submit(process_audio, file, args): file for file in file_list
-            }
-            for future in concurrent.futures.as_completed(futures):
-                try:
-                    result = (
-                        future.result()
-                    )  # Get the result of the future (processed file)
-                except Exception as exc:
-                    file = futures[future]
-                    print(f"File {file} generated an exception: {exc}")
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            TaskProgressColumn(),
+            console=console,
+        ) as progress:
+            task = progress.add_task("Processing files...", total=len(file_list))
+            
+            with ThreadPoolExecutor(max_workers=args.jobs) as executor:
+                futures = {
+                    executor.submit(
+                        process_audio, 
+                        file, 
+                        args,
+                        task_id=task,
+                        progress=progress
+                    ): file for file in file_list
+                }
+                
+                for future in concurrent.futures.as_completed(futures):
+                    progress.advance(task)
+                    try:
+                        result = future.result()
+                    except Exception as exc:
+                        file = futures[future]
+                        console.print(f"[red]File {file} generated an exception: {exc}[/red]")
+                        
     except KeyboardInterrupt:
-        print("Received KeyboardInterrupt, attempting to cancel all threads...")
+        console.print("[yellow]Received KeyboardInterrupt, attempting to cancel all threads...[/yellow]")
         executor.shutdown(wait=False, cancel_futures=True)
         raise
 
@@ -794,47 +827,32 @@ def get_interactive_config():
 
 
 def process_duplicates(args):
-    """Process both directory and file level duplicates with safety checks."""
-    print("\nPhase 1: Searching for duplicate directories...")
-    dir_duplicates = find_duplicate_directories(args.files)
+    """Process both directory and file level duplicates with visual feedback."""
+    with console.status("[bold green]Phase 1: Searching for duplicate directories...") as status:
+        dir_duplicates = find_duplicate_directories(args.files)
 
     if dir_duplicates:
-        print(
-            f"\nFound {sum(len(v) - 1 for v in dir_duplicates.values())} duplicate directories"
-        )
-
-        # Safety check: Verify directory contents match exactly
-        verified_duplicates = {}
-        for key, paths in dir_duplicates.items():
-            dir_name, file_count, total_size = key
-
-            # Get file listing for each directory
-            dir_contents = defaultdict(list)
-            for path in paths:
-                files = sorted(
-                    f.relative_to(path) for f in path.rglob("*") if f.is_file()
-                )
-                content_hash = hashlib.sha256(str(files).encode()).hexdigest()
-                dir_contents[content_hash].append(path)
-
-            # Only keep directories with exactly matching contents
-            for content_hash, matching_paths in dir_contents.items():
-                if len(matching_paths) > 1:
-                    verified_duplicates[key + (content_hash,)] = matching_paths
-
+        count = sum(len(v) - 1 for v in dir_duplicates.values())
+        console.print(Panel(f"Found [cyan]{count}[/cyan] duplicate directories", 
+                          title="Directory Scan Complete"))
+        
         if args.dry_run:
-            print("\nDRY RUN - No directories will be moved")
+            console.print("[yellow]DRY RUN - No directories will be moved[/yellow]")
         process_duplicate_directories(verified_duplicates, args)
     else:
-        print("No duplicate directories found.")
+        console.print("[blue]No duplicate directories found.[/blue]")
 
-    print("\nPhase 2: Searching for duplicate files...")
-    file_duplicates, fuzzy_groups = find_duplicate_files(args.files, args)
+    with console.status("[bold green]Phase 2: Searching for duplicate files...") as status:
+        file_duplicates, fuzzy_groups = find_duplicate_files(args.files, args)
 
     if file_duplicates:
         total_duplicates = sum(len(group) - 1 for group in file_duplicates)
-        print(f"\nFound {total_duplicates} duplicate files")
-
+        console.print(Panel(
+            f"Found [cyan]{total_duplicates}[/cyan] duplicate files\n"
+            f"Including [cyan]{len(fuzzy_groups)}[/cyan] groups of similar files",
+            title="File Scan Complete"
+        ))
+        
         # Additional safety checks for file processing
         safe_duplicates = []
         for group in file_duplicates:
@@ -856,12 +874,12 @@ def process_duplicates(args):
                 safe_duplicates.append(available_files)
 
         if args.dry_run:
-            print("\nDRY RUN - No files will be moved")
+            console.print("[yellow]DRY RUN - No files will be moved[/yellow]")
         process_duplicate_files(safe_duplicates, fuzzy_groups, args)
     else:
-        print("No duplicate files found.")
+        console.print("[blue]No duplicate files found.[/blue]")
 
-    print("\nDuplicate removal complete!")
+    console.print("[green]Duplicate removal complete![/green]")
 
 
 def main():

From 8135da880972d8673ed98d9422bf577e81f69a26 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:26:10 +0000
Subject: [PATCH 24/66] Nov 17, 2024, 10:26 PM

---
 sample-shrinker-python/sample-shrinker.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 7542910..7ca1812 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -733,18 +733,28 @@ def get_interactive_config():
                 default=["Compare file lengths", "Compare sample rates"],
             ).ask()
 
-        # Get backup options
+        # Get backup options (moved before backup_choice)
+        backup_dir = questionary.text(
+            "Backup directory path:",
+            default="_backup",
+            description="Directory where duplicates will be moved"
+        ).ask()
+        
+        if backup_dir.strip():  # If not empty
+            args.backup_dir = backup_dir.strip()
+        else:
+            args.backup_dir = "_backup"  # Fallback to default
+
         backup_choice = questionary.select(
             "How should duplicates be handled?",
             choices=[
-                "Move to backup directory (safe)",
+                f"Move to {args.backup_dir} (safe)",
                 "Delete immediately (dangerous)",
                 "Preview only (no changes)",
             ],
-            default="Move to backup directory (safe)",
+            default=f"Move to {args.backup_dir} (safe)",
         ).ask()
 
-        args.backup_dir = "_backup" if "Move" in backup_choice else None
         args.delete_duplicates = "Delete" in backup_choice
         args.dry_run = "Preview" in backup_choice
 
@@ -794,8 +804,9 @@ def get_interactive_config():
     if "Process files in parallel" in advanced_options:
         args.jobs = questionary.select(
             "How many parallel jobs?", 
-            choices=["2", "4", "8", "16"], 
-            default="4"
+            choices=["2", "4", "8", "16", "24", "32", "48", "64"], 
+            default="4",
+            description="Higher values may improve speed but use more memory"
         ).ask()
         args.jobs = int(args.jobs)
 

From ae57b21d4f68daaf58566fe2d6e8c2d8210c0fa9 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:28:38 +0000
Subject: [PATCH 25/66] Nov 17, 2024, 10:28 PM

---
 sample-shrinker-python/sample-shrinker.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 7ca1812..c2de9fa 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -24,6 +24,9 @@
 from rich.text import Text
 from rich import print as rprint
 
+# Initialize console
+console = Console()
+
 
 def usage_intro():
     return """
@@ -803,10 +806,9 @@ def get_interactive_config():
 
     if "Process files in parallel" in advanced_options:
         args.jobs = questionary.select(
-            "How many parallel jobs?", 
+            "How many parallel jobs? (higher values may improve speed but use more memory)",
             choices=["2", "4", "8", "16", "24", "32", "48", "64"], 
-            default="4",
-            description="Higher values may improve speed but use more memory"
+            default="4"
         ).ask()
         args.jobs = int(args.jobs)
 

From 2bafb23a591d0cfe5739696be7490682bfd73875 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:38:00 +0000
Subject: [PATCH 26/66] Nov 17, 2024, 10:38 PM

---
 sample-shrinker-python/sample-shrinker.py | 54 ++++++++++++++++-------
 1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index c2de9fa..f33f5c0 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -310,30 +310,44 @@ def list_files(args, file_list):
 def collect_files(args):
     """Collect all wav and mp3 files from provided directories and files."""
     file_list = []
-    # Split extensions string into a list and clean up whitespace
     valid_extensions = [ext.strip().lower() for ext in args.ext.split(",")]
-
+    
+    console.print("[cyan]Starting file collection...[/cyan]")
+    
     for path in args.files:
-        if os.path.isdir(path):
+        # Expand user and resolve path
+        path = os.path.expanduser(path)
+        path = os.path.expandvars(path)
+        path = Path(path).resolve()
+        
+        console.print(f"[cyan]Scanning path: {path}[/cyan]")
+        
+        if path.is_dir():
             for root, dirs, files in os.walk(path):
                 for file in files:
                     file_lower = file.lower()
-                    # Check if file ends with any of the valid extensions
-                    if any(
-                        file_lower.endswith(f".{ext}") for ext in valid_extensions
-                    ) and not file.startswith("._"):
-                        file_list.append(os.path.join(root, file))
-        elif os.path.isfile(path):
-            file_lower = path.lower()
-            if any(
-                file_lower.endswith(f".{ext}") for ext in valid_extensions
-            ) and not os.path.basename(path).startswith("._"):
-                file_list.append(path)
+                    if any(file_lower.endswith(f".{ext}") for ext in valid_extensions) and not file.startswith("._"):
+                        full_path = os.path.join(root, file)
+                        file_list.append(full_path)
+                        if args.verbose:
+                            console.print(f"[dim]Found: {full_path}[/dim]")
+        elif path.is_file():
+            file_lower = str(path).lower()
+            if any(file_lower.endswith(f".{ext}") for ext in valid_extensions) and not path.name.startswith("._"):
+                file_list.append(str(path))
+                if args.verbose:
+                    console.print(f"[dim]Found: {path}[/dim]")
+    
+    console.print(f"[green]Found {len(file_list)} files to process[/green]")
     return file_list
 
 
 def run_in_parallel(file_list, args):
     """Run the audio processing in parallel with progress bar."""
+    if not file_list:
+        console.print("[yellow]No files to process![/yellow]")
+        return
+        
     try:
         with Progress(
             SpinnerColumn(),
@@ -342,9 +356,13 @@ def run_in_parallel(file_list, args):
             TaskProgressColumn(),
             console=console,
         ) as progress:
-            task = progress.add_task("Processing files...", total=len(file_list))
+            total_files = len(file_list)
+            console.print(f"[cyan]Starting processing of {total_files} files with {args.jobs} parallel jobs[/cyan]")
+            
+            task = progress.add_task("Processing files...", total=total_files)
             
             with ThreadPoolExecutor(max_workers=args.jobs) as executor:
+                # Submit all tasks
                 futures = {
                     executor.submit(
                         process_audio, 
@@ -355,6 +373,7 @@ def run_in_parallel(file_list, args):
                     ): file for file in file_list
                 }
                 
+                # Process completed tasks
                 for future in concurrent.futures.as_completed(futures):
                     progress.advance(task)
                     try:
@@ -362,11 +381,16 @@ def run_in_parallel(file_list, args):
                     except Exception as exc:
                         file = futures[future]
                         console.print(f"[red]File {file} generated an exception: {exc}[/red]")
+                    
+            console.print("[green]Processing complete![/green]")
                         
     except KeyboardInterrupt:
         console.print("[yellow]Received KeyboardInterrupt, attempting to cancel all threads...[/yellow]")
         executor.shutdown(wait=False, cancel_futures=True)
         raise
+    except Exception as e:
+        console.print(f"[red]Error in parallel processing: {e}[/red]")
+        raise
 
 
 def get_file_hash(file_path, fuzzy=False, chunk_size=1024 * 1024):

From 797b1f89ea57cb6aeacef841ed169b7098f6cf93 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:41:33 +0000
Subject: [PATCH 27/66] Nov 17, 2024, 10:41 PM

---
 sample-shrinker-python/sample-shrinker.py | 85 +++++++++++++++--------
 1 file changed, 57 insertions(+), 28 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index f33f5c0..2f49783 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -9,6 +9,7 @@
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
+import subprocess
 
 import librosa
 import matplotlib.pyplot as plt
@@ -135,22 +136,40 @@ def delete_resource_forks(directory):
 def reencode_audio(file_path):
     """Re-encode audio file to PCM 16-bit if it has a different encoding."""
     try:
-        with sf.SoundFile(file_path) as f:
-            print(
-                f"Audio encoding: {f.format}, subtype: {f.subtype}, channels: {f.channels}"
-            )
-            if f.subtype != "PCM_16":
-                # If the file is not PCM 16, re-save it as PCM_16
-                data, samplerate = sf.read(file_path)
-                temp_output = file_path.replace(
-                    os.path.splitext(file_path)[1], "_reencoded.wav"
-                )
-                sf.write(temp_output, data, samplerate, subtype="PCM_16")
-                print(f"File re-encoded to PCM_16: {file_path} -> {temp_output}")
-                return temp_output
+        output_path = str(Path(file_path).with_suffix('.reencoded.wav'))
+        # Use ffmpeg directly for more reliable conversion
+        cmd = [
+            'ffmpeg', '-y',
+            '-i', str(file_path),
+            '-acodec', 'pcm_s16le',
+            '-ar', '44100',
+            output_path
+        ]
+        
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        if result.returncode == 0:
+            console.print(f"[green]Successfully re-encoded: {output_path}[/green]")
+            return output_path
+        else:
+            console.print(f"[red]FFmpeg error: {result.stderr}[/red]")
+            return None
     except Exception as e:
-        print(f"Error re-encoding {file_path}: {e}")
-    return None
+        console.print(f"[red]Error re-encoding {file_path}: {str(e)}[/red]")
+        return None
+
+
+def check_ffmpeg():
+    """Check if ffmpeg is available and properly installed."""
+    try:
+        subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
+        return True
+    except (subprocess.SubprocessError, FileNotFoundError):
+        console.print("[red]Error: ffmpeg is not installed or not found in PATH[/red]")
+        console.print("Please install ffmpeg:")
+        console.print("  MacOS: brew install ffmpeg")
+        console.print("  Ubuntu/Debian: sudo apt install ffmpeg")
+        console.print("  Windows: https://ffmpeg.org/download.html")
+        return False
 
 
 def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
@@ -160,8 +179,22 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
             progress.update(task_id, description=f"Processing: {Path(file_path).name}")
         else:
             console.print(f"Processing file: [cyan]{file_path}[/cyan]")
+        
+        try:
+            audio = AudioSegment.from_file(file_path)
+        except (IndexError, OSError) as e:
+            console.print(f"[red]Error loading {file_path}: {str(e)}[/red]")
+            console.print("[yellow]Attempting to re-encode file...[/yellow]")
+            reencoded_file = reencode_audio(file_path)
+            if reencoded_file:
+                try:
+                    audio = AudioSegment.from_file(reencoded_file)
+                except Exception as re_err:
+                    console.print(f"[red]Failed to process re-encoded file: {str(re_err)}[/red]")
+                    return
+            else:
+                return
             
-        audio = AudioSegment.from_file(file_path)
         modified = False
         change_reason = []
 
@@ -246,18 +279,10 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
             console.print(status)
 
     except Exception as e:
-        console.print(f"[red]Error processing {file_path}: {e}[/red]")
-
-        # Try re-encoding the file if ffmpeg failed
-        reencoded_file = reencode_audio(file_path)
-        if reencoded_file:
-            try:
-                # Retry the process with the re-encoded file
-                process_audio(reencoded_file, args, dry_run)
-            except Exception as retry_error:
-                console.print(
-                    f"[red]Failed to process the re-encoded file {reencoded_file}: {retry_error}[/red]"
-                )
+        console.print(f"[red]Error processing {file_path}: {str(e)}[/red]")
+        console.print(f"[yellow]Stack trace:[/yellow]")
+        import traceback
+        console.print(traceback.format_exc())
 
 
 def check_effectively_mono(audio, threshold_dB):
@@ -920,6 +945,10 @@ def process_duplicates(args):
 
 
 def main():
+    # Check for ffmpeg first
+    if not check_ffmpeg():
+        return
+
     # Check if command line arguments were provided
     if len(sys.argv) > 1:
         args = parse_args()

From 8c9a2f65cd747205974f31d76d22a9fae1fa9fa8 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:43:20 +0000
Subject: [PATCH 28/66] Nov 17, 2024, 10:43 PM

---
 sample-shrinker-python/sample-shrinker.py | 61 ++++++++++++++++-------
 1 file changed, 43 insertions(+), 18 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 2f49783..16b4bbc 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -255,23 +255,47 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
             if not dry_run:
                 # Backup the original file if required
                 if args.backup_dir != "-":
-                    # Get the relative path from the current working directory
-                    rel_path = os.path.relpath(file_path)
-                    # Create the backup path maintaining the directory structure
-                    backup_path = os.path.join(args.backup_dir, rel_path)
-                    # Ensure the directory structure exists
-                    os.makedirs(os.path.dirname(backup_path), exist_ok=True)
-                    shutil.copy2(file_path, backup_path)  # copy2 preserves metadata
+                    try:
+                        # Convert the file path to a Path object
+                        file_path_obj = Path(file_path).resolve()
+                        # Get the absolute path to the backup directory
+                        backup_dir = Path(args.backup_dir).resolve()
+                        
+                        # Create the relative path structure
+                        rel_path = file_path_obj.relative_to(file_path_obj.parent)
+                        backup_path = backup_dir / rel_path.parent.name / rel_path.name
+                        
+                        # Ensure the backup directory exists
+                        backup_path.parent.mkdir(parents=True, exist_ok=True)
+                        
+                        # Add .old extension for the backup
+                        backup_path = backup_path.with_suffix(backup_path.suffix + '.old')
+                        
+                        # Copy the original file with metadata preserved
+                        console.print(f"[cyan]Backing up to: {backup_path}[/cyan]")
+                        shutil.copy2(file_path, backup_path)
+                        
+                        # Generate spectrograms if enabled
+                        if not args.skip_spectrograms:
+                            generate_spectrogram(file_path, file_path, backup_path.parent)
+                            
+                    except Exception as e:
+                        console.print(f"[red]Error creating backup: {str(e)}[/red]")
+                        if args.verbose:
+                            import traceback
+                            console.print(traceback.format_exc())
+                        return
 
                 # Export the converted audio file
-                output_file = file_path.replace(os.path.splitext(file_path)[1], ".wav")
-                audio.export(output_file, format="wav")
-
-                # Generate spectrogram if enabled
-                if not args.skip_spectrograms:
-                    generate_spectrogram(
-                        file_path, output_file, os.path.dirname(backup_path)
-                    )
+                try:
+                    output_file = file_path
+                    audio.export(output_file, format="wav")
+                    console.print(f"[green]Converted file saved: {output_file}[/green]")
+                except Exception as e:
+                    console.print(f"[red]Error saving converted file: {str(e)}[/red]")
+                    if args.verbose:
+                        import traceback
+                        console.print(traceback.format_exc())
         else:
             status = Text()
             status.append(f"{file_path} ", style="cyan")
@@ -280,9 +304,10 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
 
     except Exception as e:
         console.print(f"[red]Error processing {file_path}: {str(e)}[/red]")
-        console.print(f"[yellow]Stack trace:[/yellow]")
-        import traceback
-        console.print(traceback.format_exc())
+        if args.verbose:
+            console.print(f"[yellow]Stack trace:[/yellow]")
+            import traceback
+            console.print(traceback.format_exc())
 
 
 def check_effectively_mono(audio, threshold_dB):

From 68b8a68b7ec171b3030bb42133fc1b26c8a40491 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:44:08 +0000
Subject: [PATCH 29/66] Nov 17, 2024, 10:44 PM

---
 sample-shrinker-python/sample-shrinker.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 16b4bbc..df2314c 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -253,7 +253,7 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
             console.print(status)
             
             if not dry_run:
-                # Backup the original file if required
+                # Backup handling
                 if args.backup_dir != "-":
                     try:
                         # Convert the file path to a Path object
@@ -285,6 +285,8 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
                             import traceback
                             console.print(traceback.format_exc())
                         return
+                else:
+                    console.print("[yellow]No backup created (backups disabled)[/yellow]")
 
                 # Export the converted audio file
                 try:
@@ -869,7 +871,8 @@ def get_interactive_config():
             "Preview changes (dry run)",
             "Process files in parallel",
             "Set minimum sample rate",
-            "Set minimum bit depth"
+            "Set minimum bit depth",
+            "Convert in place (no backups)",
         ],
     ).ask()
 
@@ -877,6 +880,22 @@ def get_interactive_config():
     args.pre_normalize = "Pre-normalize before conversion" in advanced_options
     args.skip_spectrograms = "Skip generating spectrograms" in advanced_options
     args.dry_run = "Preview changes (dry run)" in advanced_options
+    convert_in_place = "Convert in place (no backups)" in advanced_options
+
+    # Configure backup settings if not converting in place
+    if not convert_in_place:
+        args.backup_dir = questionary.text(
+            "Backup directory path:",
+            default="_backup",
+        ).ask()
+        if args.backup_dir.strip():  # If not empty
+            args.skip_spectrograms = questionary.confirm(
+                "Generate spectrograms for backup comparison?",
+                default=not args.skip_spectrograms
+            ).ask()
+        else:
+            args.backup_dir = "-"
+            args.skip_spectrograms = True
 
     if "Process files in parallel" in advanced_options:
         args.jobs = questionary.select(

From 925c6ae1d284a5187ff047ce1ff65f850a7d21f0 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:45:24 +0000
Subject: [PATCH 30/66] Nov 17, 2024, 10:45 PM

---
 sample-shrinker-python/requirements.txt   |   1 +
 sample-shrinker-python/sample-shrinker.py | 281 ++++++++--------------
 2 files changed, 101 insertions(+), 181 deletions(-)

diff --git a/sample-shrinker-python/requirements.txt b/sample-shrinker-python/requirements.txt
index ac87aa6..fcfd3f4 100644
--- a/sample-shrinker-python/requirements.txt
+++ b/sample-shrinker-python/requirements.txt
@@ -3,5 +3,6 @@ matplotlib==3.9.2
 numpy==2.1.3
 pydub==0.25.1
 questionary==2.0.1
+rich==13.9.4
 scipy==1.14.1
 soundfile==0.12.1
diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index df2314c..df9cf14 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -10,6 +10,7 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 import subprocess
+import json
 
 import librosa
 import matplotlib.pyplot as plt
@@ -708,8 +709,56 @@ def process_duplicate_directories(duplicates, args):
                     print(f"Error moving directory {dir_path}: {e}")
 
 
+def load_saved_config():
+    """Load previously saved configuration."""
+    config_path = Path.home() / '.sample-shrinker.json'
+    if config_path.exists():
+        try:
+            with open(config_path, 'r') as f:
+                config = json.load(f)
+                console.print("[dim]Loaded saved configuration[/dim]")
+                return config
+        except Exception as e:
+            console.print(f"[yellow]Error loading saved config: {e}[/yellow]")
+    return {}
+
+def save_config(args, action):
+    """Save current configuration."""
+    config_path = Path.home() / '.sample-shrinker.json'
+    try:
+        # Convert namespace to dict and handle Path objects
+        config = {
+            'last_action': action,
+            'files': [str(p) for p in args.files],
+            'backup_dir': args.backup_dir,
+            'bitdepth': args.bitdepth,
+            'channels': args.channels,
+            'samplerate': args.samplerate,
+            'min_samplerate': args.min_samplerate,
+            'min_bitdepth': args.min_bitdepth,
+            'auto_mono': args.auto_mono,
+            'auto_mono_threshold': args.auto_mono_threshold,
+            'skip_spectrograms': args.skip_spectrograms,
+            'pre_normalize': args.pre_normalize,
+            'jobs': args.jobs,
+            # Duplicate removal specific settings
+            'use_fuzzy': getattr(args, 'use_fuzzy', False),
+            'ignore_names': getattr(args, 'ignore_names', False),
+            'fuzzy_threshold': getattr(args, 'fuzzy_threshold', 90),
+            'fuzzy_options': getattr(args, 'fuzzy_options', []),
+            'advanced_options': getattr(args, 'advanced_options', []),
+        }
+        
+        with open(config_path, 'w') as f:
+            json.dump(config, f, indent=2)
+            console.print("[dim]Saved configuration for next time[/dim]")
+    except Exception as e:
+        console.print(f"[yellow]Error saving config: {e}[/yellow]")
+
 def get_interactive_config():
     """Get configuration through interactive questionary prompts."""
+    # Load saved configuration
+    saved_config = load_saved_config()
     
     # First, get the action type
     action = questionary.select(
@@ -719,6 +768,7 @@ def get_interactive_config():
             "Remove duplicate directories",
             "Exit",
         ],
+        default=saved_config.get('last_action', "Shrink samples (convert audio files)")
     ).ask()
 
     if action == "Exit":
@@ -726,50 +776,41 @@ def get_interactive_config():
 
     # Get the directory/files to process
     paths = []
-    while True:
-        path = questionary.path(
-            "Select directory or file to process (press Enter with empty path when done):",
-            only_directories=False,
+    last_paths = saved_config.get('files', [])
+    
+    if last_paths:
+        use_last = questionary.confirm(
+            f"Use last paths?\n" + "\n".join(last_paths),
+            default=True
         ).ask()
-        
-        if not path:  # Empty input
-            if paths:  # If we have at least one path, break
-                break
-            else:  # If no paths yet, ask again
-                print("Please select at least one directory or file.")
-                continue
-        
-        paths.append(path)
-        
-        if not questionary.confirm("Add another path?", default=False).ask():
-            break
+        if use_last:
+            paths = last_paths
 
-    if not paths:
-        return None, None
+    # ... rest of path collection code ...
 
-    # Create a namespace object to match argparse structure
+    # Create a namespace object with saved defaults
     args = argparse.Namespace()
     args.files = paths
-
-    # Set ALL default values (matching parse_args defaults)
-    args.backup_dir = "_backup"
-    args.dry_run = False
-    args.verbose = False
-    args.ext = "wav,mp3"
-    args.bitdepth = 16
-    args.min_bitdepth = None
-    args.channels = 2
-    args.samplerate = 44100
-    args.min_samplerate = None
-    args.auto_mono = False
-    args.auto_mono_threshold = -95.5
-    args.skip_spectrograms = False
-    args.pre_normalize = False
-    args.list = False
-    args.jobs = 1
+    args.backup_dir = saved_config.get('backup_dir', "_backup")
+    args.bitdepth = saved_config.get('bitdepth', 16)
+    args.channels = saved_config.get('channels', 2)
+    args.samplerate = saved_config.get('samplerate', 44100)
+    args.min_samplerate = saved_config.get('min_samplerate', None)
+    args.min_bitdepth = saved_config.get('min_bitdepth', None)
+    args.auto_mono = saved_config.get('auto_mono', False)
+    args.auto_mono_threshold = saved_config.get('auto_mono_threshold', -95.5)
+    args.skip_spectrograms = saved_config.get('skip_spectrograms', False)
+    args.pre_normalize = saved_config.get('pre_normalize', False)
+    args.jobs = saved_config.get('jobs', 1)
 
     if action == "Remove duplicate directories":
-        # For duplicate removal, get configuration options
+        # Use saved defaults for duplicate options
+        saved_duplicate_options = []
+        if saved_config.get('use_fuzzy', False):
+            saved_duplicate_options.append("Use fuzzy matching for similar files")
+        if saved_config.get('ignore_names', False):
+            saved_duplicate_options.append("Ignore filenames (match by content only)")
+        
         duplicate_options = questionary.checkbox(
             "Select duplicate removal options:",
             choices=[
@@ -778,158 +819,36 @@ def get_interactive_config():
                 "Preview changes (dry run)",
                 "Show detailed progress",
             ],
-            default=["Preview changes (dry run)"],
+            default=saved_duplicate_options
         ).ask()
 
-        args.use_fuzzy = "Use fuzzy matching for similar files" in duplicate_options
-        args.ignore_names = (
-            "Ignore filenames (match by content only)" in duplicate_options
-        )
-        args.dry_run = "Preview changes (dry run)" in duplicate_options
-        args.verbose = "Show detailed progress" in duplicate_options
-
-        if args.use_fuzzy:
-            # Get fuzzy matching configuration
-            args.fuzzy_threshold = questionary.select(
-                "Select fuzzy matching threshold (higher = more strict):",
-                choices=[
-                    "95 - Nearly identical",
-                    "90 - Very similar",
-                    "85 - Similar",
-                    "80 - Somewhat similar",
-                ],
-                default="90 - Very similar",
-            ).ask()
-            args.fuzzy_threshold = int(args.fuzzy_threshold.split()[0])
-
-            args.fuzzy_options = questionary.checkbox(
-                "Select fuzzy matching options:",
-                choices=[
-                    "Compare file lengths",
-                    "Compare sample rates",
-                    "Compare channel counts",
-                ],
-                default=["Compare file lengths", "Compare sample rates"],
-            ).ask()
-
-        # Get backup options (moved before backup_choice)
-        backup_dir = questionary.text(
-            "Backup directory path:",
-            default="_backup",
-            description="Directory where duplicates will be moved"
-        ).ask()
-        
-        if backup_dir.strip():  # If not empty
-            args.backup_dir = backup_dir.strip()
-        else:
-            args.backup_dir = "_backup"  # Fallback to default
+        # ... rest of duplicate removal configuration ...
 
-        backup_choice = questionary.select(
-            "How should duplicates be handled?",
+    else:  # Sample shrinking
+        # Use saved defaults for advanced options
+        saved_advanced = saved_config.get('advanced_options', [])
+        advanced_options = questionary.checkbox(
+            "Select additional options:",
             choices=[
-                f"Move to {args.backup_dir} (safe)",
-                "Delete immediately (dangerous)",
-                "Preview only (no changes)",
+                "Auto-convert stereo to mono when possible",
+                "Pre-normalize before conversion",
+                "Skip generating spectrograms",
+                "Preview changes (dry run)",
+                "Process files in parallel",
+                "Set minimum sample rate",
+                "Set minimum bit depth",
+                "Convert in place (no backups)",
             ],
-            default=f"Move to {args.backup_dir} (safe)",
-        ).ask()
-
-        args.delete_duplicates = "Delete" in backup_choice
-        args.dry_run = "Preview" in backup_choice
-
-        return "duplicates", args
-
-    # For sample shrinking, get all the conversion options
-    args.bitdepth = questionary.select(
-        "Select target bit depth:", 
-        choices=["8", "16", "24"], 
-        default="16"
-    ).ask()
-    args.bitdepth = int(args.bitdepth)
-
-    args.channels = questionary.select(
-        "Select target channels:",
-        choices=["1 (mono)", "2 (stereo)"],
-        default="2 (stereo)",
-    ).ask()
-    args.channels = 1 if "1" in args.channels else 2
-
-    args.samplerate = questionary.select(
-        "Select target sample rate:",
-        choices=["22050", "44100", "48000"],
-        default="44100",
-    ).ask()
-    args.samplerate = int(args.samplerate)
-
-    # Advanced options in a checkbox group
-    advanced_options = questionary.checkbox(
-        "Select additional options:",
-        choices=[
-            "Auto-convert stereo to mono when possible",
-            "Pre-normalize before conversion",
-            "Skip generating spectrograms",
-            "Preview changes (dry run)",
-            "Process files in parallel",
-            "Set minimum sample rate",
-            "Set minimum bit depth",
-            "Convert in place (no backups)",
-        ],
-    ).ask()
-
-    args.auto_mono = "Auto-convert stereo to mono when possible" in advanced_options
-    args.pre_normalize = "Pre-normalize before conversion" in advanced_options
-    args.skip_spectrograms = "Skip generating spectrograms" in advanced_options
-    args.dry_run = "Preview changes (dry run)" in advanced_options
-    convert_in_place = "Convert in place (no backups)" in advanced_options
-
-    # Configure backup settings if not converting in place
-    if not convert_in_place:
-        args.backup_dir = questionary.text(
-            "Backup directory path:",
-            default="_backup",
+            default=saved_advanced
         ).ask()
-        if args.backup_dir.strip():  # If not empty
-            args.skip_spectrograms = questionary.confirm(
-                "Generate spectrograms for backup comparison?",
-                default=not args.skip_spectrograms
-            ).ask()
-        else:
-            args.backup_dir = "-"
-            args.skip_spectrograms = True
-
-    if "Process files in parallel" in advanced_options:
-        args.jobs = questionary.select(
-            "How many parallel jobs? (higher values may improve speed but use more memory)",
-            choices=["2", "4", "8", "16", "24", "32", "48", "64"], 
-            default="4"
-        ).ask()
-        args.jobs = int(args.jobs)
-
-    if "Set minimum sample rate" in advanced_options:
-        args.min_samplerate = questionary.select(
-            "Select minimum sample rate:",
-            choices=["22050", "44100", "48000"],
-            default="22050"
-        ).ask()
-        args.min_samplerate = int(args.min_samplerate)
+        
+        # Store selected options for next time
+        args.advanced_options = advanced_options
 
-    if "Set minimum bit depth" in advanced_options:
-        args.min_bitdepth = questionary.select(
-            "Select minimum bit depth:",
-            choices=["8", "16", "24"],
-            default="16"
-        ).ask()
-        args.min_bitdepth = int(args.min_bitdepth)
-
-    if args.auto_mono:
-        args.auto_mono_threshold = float(
-            questionary.text(
-                "Auto-mono threshold in dB (default: -95.5):", 
-                default="-95.5"
-            ).ask()
-        )
+    # Save the final configuration
+    save_config(args, action)
 
-    return "shrink", args
+    return "duplicates" if "Remove" in action else "shrink", args
 
 
 def process_duplicates(args):

From 8eb86b7f06da149f3ad524d1404f395d7c171a64 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:46:31 +0000
Subject: [PATCH 31/66] Nov 17, 2024, 10:46 PM

---
 sample-shrinker-python/sample-shrinker.py | 31 ++++++++++++++---------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index df9cf14..75d150d 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -819,7 +819,8 @@ def get_interactive_config():
                 "Preview changes (dry run)",
                 "Show detailed progress",
             ],
-            default=saved_duplicate_options
+            # Only use default if we have valid saved options
+            **({"default": saved_duplicate_options} if saved_duplicate_options else {})
         ).ask()
 
         # ... rest of duplicate removal configuration ...
@@ -827,19 +828,25 @@ def get_interactive_config():
     else:  # Sample shrinking
         # Use saved defaults for advanced options
         saved_advanced = saved_config.get('advanced_options', [])
+        # Validate saved options against available choices
+        available_choices = [
+            "Auto-convert stereo to mono when possible",
+            "Pre-normalize before conversion",
+            "Skip generating spectrograms",
+            "Preview changes (dry run)",
+            "Process files in parallel",
+            "Set minimum sample rate",
+            "Set minimum bit depth",
+            "Convert in place (no backups)",
+        ]
+        # Only keep valid saved options
+        valid_saved = [opt for opt in saved_advanced if opt in available_choices]
+        
         advanced_options = questionary.checkbox(
             "Select additional options:",
-            choices=[
-                "Auto-convert stereo to mono when possible",
-                "Pre-normalize before conversion",
-                "Skip generating spectrograms",
-                "Preview changes (dry run)",
-                "Process files in parallel",
-                "Set minimum sample rate",
-                "Set minimum bit depth",
-                "Convert in place (no backups)",
-            ],
-            default=saved_advanced
+            choices=available_choices,
+            # Only use default if we have valid saved options
+            **({"default": valid_saved} if valid_saved else {})
         ).ask()
         
         # Store selected options for next time

From 2769d3c55ed56c09db98404432f5b0a2695a654f Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:47:46 +0000
Subject: [PATCH 32/66] Nov 17, 2024, 10:47 PM

---
 sample-shrinker-python/sample-shrinker.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 75d150d..377c296 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -741,6 +741,8 @@ def save_config(args, action):
             'skip_spectrograms': args.skip_spectrograms,
             'pre_normalize': args.pre_normalize,
             'jobs': args.jobs,
+            'ext': getattr(args, 'ext', "wav,mp3"),
+            'verbose': getattr(args, 'verbose', False),
             # Duplicate removal specific settings
             'use_fuzzy': getattr(args, 'use_fuzzy', False),
             'ignore_names': getattr(args, 'ignore_names', False),
@@ -788,7 +790,7 @@ def get_interactive_config():
 
     # ... rest of path collection code ...
 
-    # Create a namespace object with saved defaults
+    # Create a namespace object with ALL default values
     args = argparse.Namespace()
     args.files = paths
     args.backup_dir = saved_config.get('backup_dir', "_backup")
@@ -802,6 +804,9 @@ def get_interactive_config():
     args.skip_spectrograms = saved_config.get('skip_spectrograms', False)
     args.pre_normalize = saved_config.get('pre_normalize', False)
     args.jobs = saved_config.get('jobs', 1)
+    args.ext = saved_config.get('ext', "wav,mp3")
+    args.verbose = saved_config.get('verbose', False)
+    args.list = False
 
     if action == "Remove duplicate directories":
         # Use saved defaults for duplicate options

From 357c34d738abdd3548f0304278cb83965ab39e0b Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:49:22 +0000
Subject: [PATCH 33/66] Nov 17, 2024, 10:49 PM

---
 sample-shrinker-python/sample-shrinker.py | 76 ++++++++++++++++++++---
 1 file changed, 69 insertions(+), 7 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 377c296..34f085d 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -831,9 +831,7 @@ def get_interactive_config():
         # ... rest of duplicate removal configuration ...
 
     else:  # Sample shrinking
-        # Use saved defaults for advanced options
-        saved_advanced = saved_config.get('advanced_options', [])
-        # Validate saved options against available choices
+        # Define available choices first
         available_choices = [
             "Auto-convert stereo to mono when possible",
             "Pre-normalize before conversion",
@@ -844,19 +842,83 @@ def get_interactive_config():
             "Set minimum bit depth",
             "Convert in place (no backups)",
         ]
-        # Only keep valid saved options
-        valid_saved = [opt for opt in saved_advanced if opt in available_choices]
         
+        # Get saved options and validate them
+        saved_advanced = saved_config.get('advanced_options', [])
+        # Only use saved options that exist in available choices
+        valid_saved = []
+        if saved_advanced:
+            valid_saved = [opt for opt in available_choices if opt in saved_advanced]
+        
+        # Create the checkbox without conditional default
         advanced_options = questionary.checkbox(
             "Select additional options:",
             choices=available_choices,
-            # Only use default if we have valid saved options
-            **({"default": valid_saved} if valid_saved else {})
+            default=valid_saved
         ).ask()
         
         # Store selected options for next time
         args.advanced_options = advanced_options
 
+        # Process the selections
+        args.auto_mono = "Auto-convert stereo to mono when possible" in advanced_options
+        args.pre_normalize = "Pre-normalize before conversion" in advanced_options
+        args.skip_spectrograms = "Skip generating spectrograms" in advanced_options
+        args.dry_run = "Preview changes (dry run)" in advanced_options
+        convert_in_place = "Convert in place (no backups)" in advanced_options
+
+        if "Process files in parallel" in advanced_options:
+            args.jobs = questionary.select(
+                "How many parallel jobs?",
+                choices=["2", "4", "8", "16", "24", "32", "48", "64"],
+                default=str(saved_config.get('jobs', 4))
+            ).ask()
+            args.jobs = int(args.jobs)
+        else:
+            args.jobs = 1
+
+        if "Set minimum sample rate" in advanced_options:
+            args.min_samplerate = questionary.select(
+                "Select minimum sample rate:",
+                choices=["22050", "44100", "48000"],
+                default=str(saved_config.get('min_samplerate', 22050))
+            ).ask()
+            args.min_samplerate = int(args.min_samplerate)
+
+        if "Set minimum bit depth" in advanced_options:
+            args.min_bitdepth = questionary.select(
+                "Select minimum bit depth:",
+                choices=["8", "16", "24"],
+                default=str(saved_config.get('min_bitdepth', 16))
+            ).ask()
+            args.min_bitdepth = int(args.min_bitdepth)
+
+        # Configure backup settings if not converting in place
+        if not convert_in_place:
+            backup_enabled = questionary.confirm(
+                "Enable backups of original files?",
+                default=not args.backup_dir == "-"
+            ).ask()
+            
+            if backup_enabled:
+                backup_dir = questionary.text(
+                    "Backup directory path:",
+                    default=args.backup_dir if args.backup_dir != "-" else "_backup"
+                ).ask()
+                args.backup_dir = backup_dir.strip() if backup_dir.strip() else "_backup"
+                
+                if not args.skip_spectrograms:
+                    args.skip_spectrograms = not questionary.confirm(
+                        "Generate spectrograms for backup comparison?",
+                        default=True
+                    ).ask()
+            else:
+                args.backup_dir = "-"
+                args.skip_spectrograms = True
+        else:
+            args.backup_dir = "-"
+            args.skip_spectrograms = True
+
     # Save the final configuration
     save_config(args, action)
 

From 0ac9acbe8e82b9a1b471278358c68c90e4dac281 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:51:29 +0000
Subject: [PATCH 34/66] Nov 17, 2024, 10:51 PM

---
 sample-shrinker-python/sample-shrinker.py | 539 ++++++++++++----------
 1 file changed, 290 insertions(+), 249 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 34f085d..49cd402 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -4,27 +4,32 @@
 import hashlib
 import os
 import shutil
+import subprocess
 import sys
 import time
 from collections import defaultdict
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
-import subprocess
-import json
 
 import librosa
 import matplotlib.pyplot as plt
 import numpy as np
 import questionary
-import soundfile as sf
 import scipy.signal
-from scipy.io import wavfile
+import soundfile as sf
 from pydub import AudioSegment
+from rich import print as rprint
 from rich.console import Console
-from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
 from rich.panel import Panel
+from rich.progress import (
+    BarColumn,
+    Progress,
+    SpinnerColumn,
+    TaskProgressColumn,
+    TextColumn,
+)
 from rich.text import Text
-from rich import print as rprint
+from scipy.io import wavfile
 
 # Initialize console
 console = Console()
@@ -137,16 +142,20 @@ def delete_resource_forks(directory):
 def reencode_audio(file_path):
     """Re-encode audio file to PCM 16-bit if it has a different encoding."""
     try:
-        output_path = str(Path(file_path).with_suffix('.reencoded.wav'))
+        output_path = str(Path(file_path).with_suffix(".reencoded.wav"))
         # Use ffmpeg directly for more reliable conversion
         cmd = [
-            'ffmpeg', '-y',
-            '-i', str(file_path),
-            '-acodec', 'pcm_s16le',
-            '-ar', '44100',
-            output_path
+            "ffmpeg",
+            "-y",
+            "-i",
+            str(file_path),
+            "-acodec",
+            "pcm_s16le",
+            "-ar",
+            "44100",
+            output_path,
         ]
-        
+
         result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode == 0:
             console.print(f"[green]Successfully re-encoded: {output_path}[/green]")
@@ -162,7 +171,7 @@ def reencode_audio(file_path):
 def check_ffmpeg():
     """Check if ffmpeg is available and properly installed."""
     try:
-        subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
+        subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
         return True
     except (subprocess.SubprocessError, FileNotFoundError):
         console.print("[red]Error: ffmpeg is not installed or not found in PATH[/red]")
@@ -180,7 +189,7 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
             progress.update(task_id, description=f"Processing: {Path(file_path).name}")
         else:
             console.print(f"Processing file: [cyan]{file_path}[/cyan]")
-        
+
         try:
             audio = AudioSegment.from_file(file_path)
         except (IndexError, OSError) as e:
@@ -191,11 +200,13 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
                 try:
                     audio = AudioSegment.from_file(reencoded_file)
                 except Exception as re_err:
-                    console.print(f"[red]Failed to process re-encoded file: {str(re_err)}[/red]")
+                    console.print(
+                        f"[red]Failed to process re-encoded file: {str(re_err)}[/red]"
+                    )
                     return
             else:
                 return
-            
+
         modified = False
         change_reason = []
 
@@ -252,7 +263,7 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
             status.append("[CHANGED]: ", style="yellow")
             status.append(", ".join(change_reason), style="green")
             console.print(status)
-            
+
             if not dry_run:
                 # Backup handling
                 if args.backup_dir != "-":
@@ -261,33 +272,40 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
                         file_path_obj = Path(file_path).resolve()
                         # Get the absolute path to the backup directory
                         backup_dir = Path(args.backup_dir).resolve()
-                        
+
                         # Create the relative path structure
                         rel_path = file_path_obj.relative_to(file_path_obj.parent)
                         backup_path = backup_dir / rel_path.parent.name / rel_path.name
-                        
+
                         # Ensure the backup directory exists
                         backup_path.parent.mkdir(parents=True, exist_ok=True)
-                        
+
                         # Add .old extension for the backup
-                        backup_path = backup_path.with_suffix(backup_path.suffix + '.old')
-                        
+                        backup_path = backup_path.with_suffix(
+                            backup_path.suffix + ".old"
+                        )
+
                         # Copy the original file with metadata preserved
                         console.print(f"[cyan]Backing up to: {backup_path}[/cyan]")
                         shutil.copy2(file_path, backup_path)
-                        
+
                         # Generate spectrograms if enabled
                         if not args.skip_spectrograms:
-                            generate_spectrogram(file_path, file_path, backup_path.parent)
-                            
+                            generate_spectrogram(
+                                file_path, file_path, backup_path.parent
+                            )
+
                     except Exception as e:
                         console.print(f"[red]Error creating backup: {str(e)}[/red]")
                         if args.verbose:
                             import traceback
+
                             console.print(traceback.format_exc())
                         return
                 else:
-                    console.print("[yellow]No backup created (backups disabled)[/yellow]")
+                    console.print(
+                        "[yellow]No backup created (backups disabled)[/yellow]"
+                    )
 
                 # Export the converted audio file
                 try:
@@ -298,6 +316,7 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
                     console.print(f"[red]Error saving converted file: {str(e)}[/red]")
                     if args.verbose:
                         import traceback
+
                         console.print(traceback.format_exc())
         else:
             status = Text()
@@ -310,6 +329,7 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
         if args.verbose:
             console.print(f"[yellow]Stack trace:[/yellow]")
             import traceback
+
             console.print(traceback.format_exc())
 
 
@@ -364,33 +384,37 @@ def collect_files(args):
     """Collect all wav and mp3 files from provided directories and files."""
     file_list = []
     valid_extensions = [ext.strip().lower() for ext in args.ext.split(",")]
-    
+
     console.print("[cyan]Starting file collection...[/cyan]")
-    
+
     for path in args.files:
         # Expand user and resolve path
         path = os.path.expanduser(path)
         path = os.path.expandvars(path)
         path = Path(path).resolve()
-        
+
         console.print(f"[cyan]Scanning path: {path}[/cyan]")
-        
+
         if path.is_dir():
             for root, dirs, files in os.walk(path):
                 for file in files:
                     file_lower = file.lower()
-                    if any(file_lower.endswith(f".{ext}") for ext in valid_extensions) and not file.startswith("._"):
+                    if any(
+                        file_lower.endswith(f".{ext}") for ext in valid_extensions
+                    ) and not file.startswith("._"):
                         full_path = os.path.join(root, file)
                         file_list.append(full_path)
                         if args.verbose:
                             console.print(f"[dim]Found: {full_path}[/dim]")
         elif path.is_file():
             file_lower = str(path).lower()
-            if any(file_lower.endswith(f".{ext}") for ext in valid_extensions) and not path.name.startswith("._"):
+            if any(
+                file_lower.endswith(f".{ext}") for ext in valid_extensions
+            ) and not path.name.startswith("._"):
                 file_list.append(str(path))
                 if args.verbose:
                     console.print(f"[dim]Found: {path}[/dim]")
-    
+
     console.print(f"[green]Found {len(file_list)} files to process[/green]")
     return file_list
 
@@ -400,7 +424,7 @@ def run_in_parallel(file_list, args):
     if not file_list:
         console.print("[yellow]No files to process![/yellow]")
         return
-        
+
     try:
         with Progress(
             SpinnerColumn(),
@@ -410,22 +434,21 @@ def run_in_parallel(file_list, args):
             console=console,
         ) as progress:
             total_files = len(file_list)
-            console.print(f"[cyan]Starting processing of {total_files} files with {args.jobs} parallel jobs[/cyan]")
-            
+            console.print(
+                f"[cyan]Starting processing of {total_files} files with {args.jobs} parallel jobs[/cyan]"
+            )
+
             task = progress.add_task("Processing files...", total=total_files)
-            
+
             with ThreadPoolExecutor(max_workers=args.jobs) as executor:
                 # Submit all tasks
                 futures = {
                     executor.submit(
-                        process_audio, 
-                        file, 
-                        args,
-                        task_id=task,
-                        progress=progress
-                    ): file for file in file_list
+                        process_audio, file, args, task_id=task, progress=progress
+                    ): file
+                    for file in file_list
                 }
-                
+
                 # Process completed tasks
                 for future in concurrent.futures.as_completed(futures):
                     progress.advance(task)
@@ -433,12 +456,16 @@ def run_in_parallel(file_list, args):
                         result = future.result()
                     except Exception as exc:
                         file = futures[future]
-                        console.print(f"[red]File {file} generated an exception: {exc}[/red]")
-                    
+                        console.print(
+                            f"[red]File {file} generated an exception: {exc}[/red]"
+                        )
+
             console.print("[green]Processing complete![/green]")
-                        
+
     except KeyboardInterrupt:
-        console.print("[yellow]Received KeyboardInterrupt, attempting to cancel all threads...[/yellow]")
+        console.print(
+            "[yellow]Received KeyboardInterrupt, attempting to cancel all threads...[/yellow]"
+        )
         executor.shutdown(wait=False, cancel_futures=True)
         raise
     except Exception as e:
@@ -483,26 +510,23 @@ def get_audio_fingerprint(file_path):
         # Convert to mono for comparison
         if audio.channels > 1:
             audio = audio.set_channels(1)
-        
+
         # Convert to numpy array
         samples = np.array(audio.get_array_of_samples())
-        
+
         # Normalize
         samples = samples / np.max(np.abs(samples))
-        
+
         # Get a signature using peaks in frequency domain
         freqs, times, spectrogram = scipy.signal.spectrogram(
-            samples,
-            audio.frame_rate,
-            nperseg=1024,
-            noverlap=512
+            samples, audio.frame_rate, nperseg=1024, noverlap=512
         )
-        
+
         # Get the strongest frequencies
         peaks = np.mean(spectrogram, axis=1)
         # Normalize the peaks
         peaks = peaks / np.max(peaks)
-        
+
         return peaks
     except Exception as e:
         print(f"Error generating audio fingerprint for {file_path}: {e}")
@@ -513,12 +537,12 @@ def compare_audio_similarity(file1_fingerprint, file2_fingerprint):
     """Compare two audio fingerprints and return similarity score."""
     if file1_fingerprint is None or file2_fingerprint is None:
         return 0
-    
+
     # Ensure same length for comparison
     min_len = min(len(file1_fingerprint), len(file2_fingerprint))
     f1 = file1_fingerprint[:min_len]
     f2 = file2_fingerprint[:min_len]
-    
+
     # Calculate correlation coefficient
     correlation = np.corrcoef(f1, f2)[0, 1]
     # Convert to percentage and handle NaN
@@ -530,7 +554,7 @@ def find_duplicate_files(paths, args):
     """Find duplicate files using a multi-stage approach with audio fingerprinting."""
     print("Scanning for duplicate files...")
     size_groups = defaultdict(list)
-    
+
     # First pass: group by size
     for path in paths:
         path = Path(path)
@@ -541,16 +565,16 @@ def find_duplicate_files(paths, args):
                         print(f"Scanning: {file_path}")
                     size = file_path.stat().st_size
                     size_groups[size].append(file_path)
-    
+
     hash_groups = defaultdict(list)
     similar_groups = []
-    
+
     # Second pass: check content
     for size, file_paths in size_groups.items():
         if len(file_paths) > 1:
             if args.verbose:
                 print(f"\nChecking {len(file_paths)} files of size {size} bytes...")
-            
+
             # First try exact matches
             for file_path in file_paths:
                 try:
@@ -562,12 +586,15 @@ def find_duplicate_files(paths, args):
                         hash_groups[(name_key, file_hash)].append(file_path)
                 except Exception as e:
                     print(f"Error hashing file {file_path}: {e}")
-            
+
             # Then check for similar audio content
             if args.use_fuzzy:
-                unmatched = [f for f in file_paths 
-                           if not any(f in g for g in hash_groups.values() if len(g) > 1)]
-                
+                unmatched = [
+                    f
+                    for f in file_paths
+                    if not any(f in g for g in hash_groups.values() if len(g) > 1)
+                ]
+
                 if len(unmatched) > 1:
                     # Generate fingerprints for all unmatched files
                     fingerprints = {}
@@ -575,33 +602,32 @@ def find_duplicate_files(paths, args):
                         fingerprint = get_audio_fingerprint(file_path)
                         if fingerprint is not None:
                             fingerprints[file_path] = fingerprint
-                    
+
                     # Compare fingerprints
                     processed = set()
                     for file1 in fingerprints:
                         if file1 in processed:
                             continue
-                        
+
                         similar_files = [file1]
                         for file2 in fingerprints:
                             if file2 != file1 and file2 not in processed:
                                 similarity = compare_audio_similarity(
-                                    fingerprints[file1],
-                                    fingerprints[file2]
+                                    fingerprints[file1], fingerprints[file2]
                                 )
                                 if similarity >= args.fuzzy_threshold:
                                     similar_files.append(file2)
                                     processed.add(file2)
-                        
+
                         if len(similar_files) > 1:
                             similar_groups.append(similar_files)
                             processed.add(file1)
-    
+
     # Combine results
     duplicates = [group for group in hash_groups.values() if len(group) > 1]
     if args.use_fuzzy:
         duplicates.extend(similar_groups)
-    
+
     return duplicates, similar_groups
 
 
@@ -621,7 +647,9 @@ def process_duplicate_files(duplicates, fuzzy_groups, args):
             print("Similarity scores:")
             for file in group[1:]:
                 file_fingerprint = get_audio_fingerprint(file)
-                similarity = compare_audio_similarity(base_fingerprint, file_fingerprint)
+                similarity = compare_audio_similarity(
+                    base_fingerprint, file_fingerprint
+                )
                 print(f"  {file.name}: {similarity:.1f}% similar")
 
         # Sort files by creation time
@@ -709,59 +737,9 @@ def process_duplicate_directories(duplicates, args):
                     print(f"Error moving directory {dir_path}: {e}")
 
 
-def load_saved_config():
-    """Load previously saved configuration."""
-    config_path = Path.home() / '.sample-shrinker.json'
-    if config_path.exists():
-        try:
-            with open(config_path, 'r') as f:
-                config = json.load(f)
-                console.print("[dim]Loaded saved configuration[/dim]")
-                return config
-        except Exception as e:
-            console.print(f"[yellow]Error loading saved config: {e}[/yellow]")
-    return {}
-
-def save_config(args, action):
-    """Save current configuration."""
-    config_path = Path.home() / '.sample-shrinker.json'
-    try:
-        # Convert namespace to dict and handle Path objects
-        config = {
-            'last_action': action,
-            'files': [str(p) for p in args.files],
-            'backup_dir': args.backup_dir,
-            'bitdepth': args.bitdepth,
-            'channels': args.channels,
-            'samplerate': args.samplerate,
-            'min_samplerate': args.min_samplerate,
-            'min_bitdepth': args.min_bitdepth,
-            'auto_mono': args.auto_mono,
-            'auto_mono_threshold': args.auto_mono_threshold,
-            'skip_spectrograms': args.skip_spectrograms,
-            'pre_normalize': args.pre_normalize,
-            'jobs': args.jobs,
-            'ext': getattr(args, 'ext', "wav,mp3"),
-            'verbose': getattr(args, 'verbose', False),
-            # Duplicate removal specific settings
-            'use_fuzzy': getattr(args, 'use_fuzzy', False),
-            'ignore_names': getattr(args, 'ignore_names', False),
-            'fuzzy_threshold': getattr(args, 'fuzzy_threshold', 90),
-            'fuzzy_options': getattr(args, 'fuzzy_options', []),
-            'advanced_options': getattr(args, 'advanced_options', []),
-        }
-        
-        with open(config_path, 'w') as f:
-            json.dump(config, f, indent=2)
-            console.print("[dim]Saved configuration for next time[/dim]")
-    except Exception as e:
-        console.print(f"[yellow]Error saving config: {e}[/yellow]")
-
 def get_interactive_config():
     """Get configuration through interactive questionary prompts."""
-    # Load saved configuration
-    saved_config = load_saved_config()
-    
+
     # First, get the action type
     action = questionary.select(
         "What would you like to do?",
@@ -770,7 +748,6 @@ def get_interactive_config():
             "Remove duplicate directories",
             "Exit",
         ],
-        default=saved_config.get('last_action', "Shrink samples (convert audio files)")
     ).ask()
 
     if action == "Exit":
@@ -778,44 +755,50 @@ def get_interactive_config():
 
     # Get the directory/files to process
     paths = []
-    last_paths = saved_config.get('files', [])
-    
-    if last_paths:
-        use_last = questionary.confirm(
-            f"Use last paths?\n" + "\n".join(last_paths),
-            default=True
+    while True:
+        path = questionary.path(
+            "Select directory or file to process (press Enter with empty path when done):",
+            only_directories=False,
         ).ask()
-        if use_last:
-            paths = last_paths
 
-    # ... rest of path collection code ...
+        if not path:  # Empty input
+            if paths:  # If we have at least one path, break
+                break
+            else:  # If no paths yet, ask again
+                print("Please select at least one directory or file.")
+                continue
+
+        paths.append(path)
+
+        if not questionary.confirm("Add another path?", default=False).ask():
+            break
+
+    if not paths:
+        return None, None
 
-    # Create a namespace object with ALL default values
+    # Create a namespace object to match argparse structure
     args = argparse.Namespace()
     args.files = paths
-    args.backup_dir = saved_config.get('backup_dir', "_backup")
-    args.bitdepth = saved_config.get('bitdepth', 16)
-    args.channels = saved_config.get('channels', 2)
-    args.samplerate = saved_config.get('samplerate', 44100)
-    args.min_samplerate = saved_config.get('min_samplerate', None)
-    args.min_bitdepth = saved_config.get('min_bitdepth', None)
-    args.auto_mono = saved_config.get('auto_mono', False)
-    args.auto_mono_threshold = saved_config.get('auto_mono_threshold', -95.5)
-    args.skip_spectrograms = saved_config.get('skip_spectrograms', False)
-    args.pre_normalize = saved_config.get('pre_normalize', False)
-    args.jobs = saved_config.get('jobs', 1)
-    args.ext = saved_config.get('ext', "wav,mp3")
-    args.verbose = saved_config.get('verbose', False)
+
+    # Set ALL default values (matching parse_args defaults)
+    args.backup_dir = "_backup"
+    args.dry_run = False
+    args.verbose = False
+    args.ext = "wav,mp3"
+    args.bitdepth = 16
+    args.min_bitdepth = None
+    args.channels = 2
+    args.samplerate = 44100
+    args.min_samplerate = None
+    args.auto_mono = False
+    args.auto_mono_threshold = -95.5
+    args.skip_spectrograms = False
+    args.pre_normalize = False
     args.list = False
+    args.jobs = 1
 
     if action == "Remove duplicate directories":
-        # Use saved defaults for duplicate options
-        saved_duplicate_options = []
-        if saved_config.get('use_fuzzy', False):
-            saved_duplicate_options.append("Use fuzzy matching for similar files")
-        if saved_config.get('ignore_names', False):
-            saved_duplicate_options.append("Ignore filenames (match by content only)")
-        
+        # For duplicate removal, get configuration options
         duplicate_options = questionary.checkbox(
             "Select duplicate removal options:",
             choices=[
@@ -824,15 +807,91 @@ def get_interactive_config():
                 "Preview changes (dry run)",
                 "Show detailed progress",
             ],
-            # Only use default if we have valid saved options
-            **({"default": saved_duplicate_options} if saved_duplicate_options else {})
+            default=["Preview changes (dry run)"],
         ).ask()
 
-        # ... rest of duplicate removal configuration ...
+        args.use_fuzzy = "Use fuzzy matching for similar files" in duplicate_options
+        args.ignore_names = (
+            "Ignore filenames (match by content only)" in duplicate_options
+        )
+        args.dry_run = "Preview changes (dry run)" in duplicate_options
+        args.verbose = "Show detailed progress" in duplicate_options
+
+        if args.use_fuzzy:
+            # Get fuzzy matching configuration
+            args.fuzzy_threshold = questionary.select(
+                "Select fuzzy matching threshold (higher = more strict):",
+                choices=[
+                    "95 - Nearly identical",
+                    "90 - Very similar",
+                    "85 - Similar",
+                    "80 - Somewhat similar",
+                ],
+                default="90 - Very similar",
+            ).ask()
+            args.fuzzy_threshold = int(args.fuzzy_threshold.split()[0])
+
+            args.fuzzy_options = questionary.checkbox(
+                "Select fuzzy matching options:",
+                choices=[
+                    "Compare file lengths",
+                    "Compare sample rates",
+                    "Compare channel counts",
+                ],
+                default=["Compare file lengths", "Compare sample rates"],
+            ).ask()
+
+        # Get backup options (moved before backup_choice)
+        backup_dir = questionary.text(
+            "Backup directory path:",
+            default="_backup",
+            description="Directory where duplicates will be moved",
+        ).ask()
 
-    else:  # Sample shrinking
-        # Define available choices first
-        available_choices = [
+        if backup_dir.strip():  # If not empty
+            args.backup_dir = backup_dir.strip()
+        else:
+            args.backup_dir = "_backup"  # Fallback to default
+
+        backup_choice = questionary.select(
+            "How should duplicates be handled?",
+            choices=[
+                f"Move to {args.backup_dir} (safe)",
+                "Delete immediately (dangerous)",
+                "Preview only (no changes)",
+            ],
+            default=f"Move to {args.backup_dir} (safe)",
+        ).ask()
+
+        args.delete_duplicates = "Delete" in backup_choice
+        args.dry_run = "Preview" in backup_choice
+
+        return "duplicates", args
+
+    # For sample shrinking, get all the conversion options
+    args.bitdepth = questionary.select(
+        "Select target bit depth:", choices=["8", "16", "24"], default="16"
+    ).ask()
+    args.bitdepth = int(args.bitdepth)
+
+    args.channels = questionary.select(
+        "Select target channels:",
+        choices=["1 (mono)", "2 (stereo)"],
+        default="2 (stereo)",
+    ).ask()
+    args.channels = 1 if "1" in args.channels else 2
+
+    args.samplerate = questionary.select(
+        "Select target sample rate:",
+        choices=["22050", "44100", "48000"],
+        default="44100",
+    ).ask()
+    args.samplerate = int(args.samplerate)
+
+    # Advanced options in a checkbox group
+    advanced_options = questionary.checkbox(
+        "Select additional options:",
+        choices=[
             "Auto-convert stereo to mono when possible",
             "Pre-normalize before conversion",
             "Skip generating spectrograms",
@@ -841,117 +900,99 @@ def get_interactive_config():
             "Set minimum sample rate",
             "Set minimum bit depth",
             "Convert in place (no backups)",
-        ]
-        
-        # Get saved options and validate them
-        saved_advanced = saved_config.get('advanced_options', [])
-        # Only use saved options that exist in available choices
-        valid_saved = []
-        if saved_advanced:
-            valid_saved = [opt for opt in available_choices if opt in saved_advanced]
-        
-        # Create the checkbox without conditional default
-        advanced_options = questionary.checkbox(
-            "Select additional options:",
-            choices=available_choices,
-            default=valid_saved
+        ],
+    ).ask()
+
+    args.auto_mono = "Auto-convert stereo to mono when possible" in advanced_options
+    args.pre_normalize = "Pre-normalize before conversion" in advanced_options
+    args.skip_spectrograms = "Skip generating spectrograms" in advanced_options
+    args.dry_run = "Preview changes (dry run)" in advanced_options
+    convert_in_place = "Convert in place (no backups)" in advanced_options
+
+    # Configure backup settings if not converting in place
+    if not convert_in_place:
+        args.backup_dir = questionary.text(
+            "Backup directory path:",
+            default="_backup",
         ).ask()
-        
-        # Store selected options for next time
-        args.advanced_options = advanced_options
-
-        # Process the selections
-        args.auto_mono = "Auto-convert stereo to mono when possible" in advanced_options
-        args.pre_normalize = "Pre-normalize before conversion" in advanced_options
-        args.skip_spectrograms = "Skip generating spectrograms" in advanced_options
-        args.dry_run = "Preview changes (dry run)" in advanced_options
-        convert_in_place = "Convert in place (no backups)" in advanced_options
-
-        if "Process files in parallel" in advanced_options:
-            args.jobs = questionary.select(
-                "How many parallel jobs?",
-                choices=["2", "4", "8", "16", "24", "32", "48", "64"],
-                default=str(saved_config.get('jobs', 4))
+        if args.backup_dir.strip():  # If not empty
+            args.skip_spectrograms = questionary.confirm(
+                "Generate spectrograms for backup comparison?",
+                default=not args.skip_spectrograms,
             ).ask()
-            args.jobs = int(args.jobs)
         else:
-            args.jobs = 1
+            args.backup_dir = "-"
+            args.skip_spectrograms = True
 
-        if "Set minimum sample rate" in advanced_options:
-            args.min_samplerate = questionary.select(
-                "Select minimum sample rate:",
-                choices=["22050", "44100", "48000"],
-                default=str(saved_config.get('min_samplerate', 22050))
-            ).ask()
-            args.min_samplerate = int(args.min_samplerate)
+    if "Process files in parallel" in advanced_options:
+        args.jobs = questionary.select(
+            "How many parallel jobs? (higher values may improve speed but use more memory)",
+            choices=["2", "4", "8", "16", "24", "32", "48", "64"],
+            default="4",
+        ).ask()
+        args.jobs = int(args.jobs)
 
-        if "Set minimum bit depth" in advanced_options:
-            args.min_bitdepth = questionary.select(
-                "Select minimum bit depth:",
-                choices=["8", "16", "24"],
-                default=str(saved_config.get('min_bitdepth', 16))
-            ).ask()
-            args.min_bitdepth = int(args.min_bitdepth)
+    if "Set minimum sample rate" in advanced_options:
+        args.min_samplerate = questionary.select(
+            "Select minimum sample rate:",
+            choices=["22050", "44100", "48000"],
+            default="22050",
+        ).ask()
+        args.min_samplerate = int(args.min_samplerate)
 
-        # Configure backup settings if not converting in place
-        if not convert_in_place:
-            backup_enabled = questionary.confirm(
-                "Enable backups of original files?",
-                default=not args.backup_dir == "-"
-            ).ask()
-            
-            if backup_enabled:
-                backup_dir = questionary.text(
-                    "Backup directory path:",
-                    default=args.backup_dir if args.backup_dir != "-" else "_backup"
-                ).ask()
-                args.backup_dir = backup_dir.strip() if backup_dir.strip() else "_backup"
-                
-                if not args.skip_spectrograms:
-                    args.skip_spectrograms = not questionary.confirm(
-                        "Generate spectrograms for backup comparison?",
-                        default=True
-                    ).ask()
-            else:
-                args.backup_dir = "-"
-                args.skip_spectrograms = True
-        else:
-            args.backup_dir = "-"
-            args.skip_spectrograms = True
+    if "Set minimum bit depth" in advanced_options:
+        args.min_bitdepth = questionary.select(
+            "Select minimum bit depth:", choices=["8", "16", "24"], default="16"
+        ).ask()
+        args.min_bitdepth = int(args.min_bitdepth)
 
-    # Save the final configuration
-    save_config(args, action)
+    if args.auto_mono:
+        args.auto_mono_threshold = float(
+            questionary.text(
+                "Auto-mono threshold in dB (default: -95.5):", default="-95.5"
+            ).ask()
+        )
 
-    return "duplicates" if "Remove" in action else "shrink", args
+    return "shrink", args
 
 
 def process_duplicates(args):
     """Process both directory and file level duplicates with visual feedback."""
-    with console.status("[bold green]Phase 1: Searching for duplicate directories...") as status:
+    with console.status(
+        "[bold green]Phase 1: Searching for duplicate directories..."
+    ) as status:
         dir_duplicates = find_duplicate_directories(args.files)
 
     if dir_duplicates:
         count = sum(len(v) - 1 for v in dir_duplicates.values())
-        console.print(Panel(f"Found [cyan]{count}[/cyan] duplicate directories", 
-                          title="Directory Scan Complete"))
-        
+        console.print(
+            Panel(
+                f"Found [cyan]{count}[/cyan] duplicate directories",
+                title="Directory Scan Complete",
+            )
+        )
+
         if args.dry_run:
             console.print("[yellow]DRY RUN - No directories will be moved[/yellow]")
         process_duplicate_directories(verified_duplicates, args)
     else:
         console.print("[blue]No duplicate directories found.[/blue]")
 
-    with console.status("[bold green]Phase 2: Searching for duplicate files...") as status:
+    with console.status(
+        "[bold green]Phase 2: Searching for duplicate files..."
+    ) as status:
         file_duplicates, fuzzy_groups = find_duplicate_files(args.files, args)
 
     if file_duplicates:
         total_duplicates = sum(len(group) - 1 for group in file_duplicates)
-        console.print(Panel(
-            f"Found [cyan]{total_duplicates}[/cyan] duplicate files\n"
-            f"Including [cyan]{len(fuzzy_groups)}[/cyan] groups of similar files",
-            title="File Scan Complete"
-        ))
-        
+        console.print(
+            Panel(
+                f"Found [cyan]{total_duplicates}[/cyan] duplicate files\n"
+                f"Including [cyan]{len(fuzzy_groups)}[/cyan] groups of similar files",
+                title="File Scan Complete",
+            )
+        )
+
         # Additional safety checks for file processing
         safe_duplicates = []
         for group in file_duplicates:

From 7613deee3e007c463c593daf02c3d875645182e7 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:53:59 +0000
Subject: [PATCH 35/66] Nov 17, 2024, 10:53 PM

---
 sample-shrinker-python/sample-shrinker.py | 65 +++++++++++++----------
 1 file changed, 38 insertions(+), 27 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 49cd402..08e1a3a 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -12,6 +12,8 @@
 from pathlib import Path
 
 import librosa
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend
 import matplotlib.pyplot as plt
 import numpy as np
 import questionary
@@ -345,33 +347,42 @@ def check_effectively_mono(audio, threshold_dB):
 
 def generate_spectrogram(original_file, new_file, backup_dir):
     """Generate and save spectrograms for the original and new files."""
-    y_old, sr_old = librosa.load(original_file, sr=None)
-    y_new, sr_new = librosa.load(new_file, sr=None)
-
-    # Spectrogram for original file
-    plt.figure(figsize=(10, 4))
-    D_old = librosa.amplitude_to_db(np.abs(librosa.stft(y_old)), ref=np.max)
-    librosa.display.specshow(D_old, sr=sr_old, x_axis="time", y_axis="log")
-    plt.colorbar(format="%+2.0f dB")
-    plt.title(f"Spectrogram of {os.path.basename(original_file)}")
-    old_spectrogram_path = os.path.join(
-        backup_dir, os.path.basename(original_file) + ".old.png"
-    )
-    os.makedirs(backup_dir, exist_ok=True)  # Ensure the directory exists
-    plt.savefig(old_spectrogram_path)
-    plt.close()
-
-    # Spectrogram for new file
-    plt.figure(figsize=(10, 4))
-    D_new = librosa.amplitude_to_db(np.abs(librosa.stft(y_new)), ref=np.max)
-    librosa.display.specshow(D_new, sr=sr_new, x_axis="time", y_axis="log")
-    plt.colorbar(format="%+2.0f dB")
-    plt.title(f"Spectrogram of {os.path.basename(new_file)}")
-    new_spectrogram_path = os.path.join(
-        backup_dir, os.path.basename(new_file) + ".new.png"
-    )
-    plt.savefig(new_spectrogram_path)
-    plt.close()
+    try:
+        y_old, sr_old = librosa.load(original_file, sr=None)
+        y_new, sr_new = librosa.load(new_file, sr=None)
+
+        # Ensure the backup directory exists
+        os.makedirs(backup_dir, exist_ok=True)
+
+        # Generate spectrogram for original file
+        plt.figure(figsize=(10, 4))
+        D_old = librosa.amplitude_to_db(np.abs(librosa.stft(y_old)), ref=np.max)
+        librosa.display.specshow(D_old, sr=sr_old, x_axis="time", y_axis="log")
+        plt.colorbar(format="%+2.0f dB")
+        plt.title(f"Spectrogram of {os.path.basename(original_file)}")
+        old_spectrogram_path = os.path.join(
+            backup_dir, os.path.basename(original_file) + ".old.png"
+        )
+        plt.savefig(old_spectrogram_path)
+        plt.close('all')  # Explicitly close all figures
+
+        # Generate spectrogram for new file
+        plt.figure(figsize=(10, 4))
+        D_new = librosa.amplitude_to_db(np.abs(librosa.stft(y_new)), ref=np.max)
+        librosa.display.specshow(D_new, sr=sr_new, x_axis="time", y_axis="log")
+        plt.colorbar(format="%+2.0f dB")
+        plt.title(f"Spectrogram of {os.path.basename(new_file)}")
+        new_spectrogram_path = os.path.join(
+            backup_dir, os.path.basename(new_file) + ".new.png"
+        )
+        plt.savefig(new_spectrogram_path)
+        plt.close('all')  # Explicitly close all figures
+
+    except Exception as e:
+        console.print(f"[red]Error generating spectrograms: {str(e)}[/red]")
+        if args.verbose:
+            import traceback
+            console.print(traceback.format_exc())
 
 
 def list_files(args, file_list):

From f9809720b1a9614a98582a61ea2f932cc80ade94 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 06:55:47 +0000
Subject: [PATCH 36/66] Nov 17, 2024, 10:55 PM

---
 sample-shrinker-python/sample-shrinker.py | 24 ++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 08e1a3a..bf524fc 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -293,21 +293,27 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
 
                         # Generate spectrograms if enabled
                         if not args.skip_spectrograms:
-                            generate_spectrogram(
-                                file_path, file_path, backup_path.parent
-                            )
+                            try:
+                                generate_spectrogram(
+                                    file_path, 
+                                    file_path, 
+                                    backup_path.parent,
+                                    verbose=args.verbose
+                                )
+                            except Exception as spec_err:
+                                console.print(f"[yellow]Warning: Could not generate spectrograms: {spec_err}[/yellow]")
+                                if args.verbose:
+                                    import traceback
+                                    console.print(traceback.format_exc())
 
                     except Exception as e:
                         console.print(f"[red]Error creating backup: {str(e)}[/red]")
                         if args.verbose:
                             import traceback
-
                             console.print(traceback.format_exc())
                         return
                 else:
-                    console.print(
-                        "[yellow]No backup created (backups disabled)[/yellow]"
-                    )
+                    console.print("[yellow]No backup created (backups disabled)[/yellow]")
 
                 # Export the converted audio file
                 try:
@@ -345,7 +351,7 @@ def check_effectively_mono(audio, threshold_dB):
     return peak_diff_db < threshold_dB
 
 
-def generate_spectrogram(original_file, new_file, backup_dir):
+def generate_spectrogram(original_file, new_file, backup_dir, verbose=False):
     """Generate and save spectrograms for the original and new files."""
     try:
         y_old, sr_old = librosa.load(original_file, sr=None)
@@ -380,7 +386,7 @@ def generate_spectrogram(original_file, new_file, backup_dir):
 
     except Exception as e:
         console.print(f"[red]Error generating spectrograms: {str(e)}[/red]")
-        if args.verbose:
+        if verbose:
             import traceback
             console.print(traceback.format_exc())
 

From f7d765398bb4f1e485328af96d132f8933bb3115 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:01:18 +0000
Subject: [PATCH 37/66] Nov 17, 2024, 11:01 PM

---
 sample-shrinker-python/sample-shrinker.py | 67 ++++++++++++++---------
 1 file changed, 41 insertions(+), 26 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index bf524fc..f2f051b 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -360,29 +360,41 @@ def generate_spectrogram(original_file, new_file, backup_dir, verbose=False):
         # Ensure the backup directory exists
         os.makedirs(backup_dir, exist_ok=True)
 
+        # Set a reasonable n_fft based on signal length
+        n_fft = min(2048, len(y_old))
+        if n_fft % 2 != 0:  # Ensure n_fft is even
+            n_fft -= 1
+
         # Generate spectrogram for original file
-        plt.figure(figsize=(10, 4))
-        D_old = librosa.amplitude_to_db(np.abs(librosa.stft(y_old)), ref=np.max)
-        librosa.display.specshow(D_old, sr=sr_old, x_axis="time", y_axis="log")
-        plt.colorbar(format="%+2.0f dB")
-        plt.title(f"Spectrogram of {os.path.basename(original_file)}")
-        old_spectrogram_path = os.path.join(
-            backup_dir, os.path.basename(original_file) + ".old.png"
-        )
-        plt.savefig(old_spectrogram_path)
-        plt.close('all')  # Explicitly close all figures
-
-        # Generate spectrogram for new file
-        plt.figure(figsize=(10, 4))
-        D_new = librosa.amplitude_to_db(np.abs(librosa.stft(y_new)), ref=np.max)
-        librosa.display.specshow(D_new, sr=sr_new, x_axis="time", y_axis="log")
-        plt.colorbar(format="%+2.0f dB")
-        plt.title(f"Spectrogram of {os.path.basename(new_file)}")
-        new_spectrogram_path = os.path.join(
-            backup_dir, os.path.basename(new_file) + ".new.png"
-        )
-        plt.savefig(new_spectrogram_path)
-        plt.close('all')  # Explicitly close all figures
+        with plt.ioff():  # Turn off interactive mode
+            fig = plt.figure(figsize=(10, 4))
+            D_old = librosa.amplitude_to_db(
+                np.abs(librosa.stft(y_old, n_fft=n_fft)), 
+                ref=np.max
+            )
+            librosa.display.specshow(D_old, sr=sr_old, x_axis="time", y_axis="log")
+            plt.colorbar(format="%+2.0f dB")
+            plt.title(f"Spectrogram of {os.path.basename(original_file)}")
+            old_spectrogram_path = os.path.join(
+                backup_dir, os.path.basename(original_file) + ".old.png"
+            )
+            plt.savefig(old_spectrogram_path)
+            plt.close(fig)
+
+            # Generate spectrogram for new file
+            fig = plt.figure(figsize=(10, 4))
+            D_new = librosa.amplitude_to_db(
+                np.abs(librosa.stft(y_new, n_fft=n_fft)), 
+                ref=np.max
+            )
+            librosa.display.specshow(D_new, sr=sr_new, x_axis="time", y_axis="log")
+            plt.colorbar(format="%+2.0f dB")
+            plt.title(f"Spectrogram of {os.path.basename(new_file)}")
+            new_spectrogram_path = os.path.join(
+                backup_dir, os.path.basename(new_file) + ".new.png"
+            )
+            plt.savefig(new_spectrogram_path)
+            plt.close(fig)
 
     except Exception as e:
         console.print(f"[red]Error generating spectrograms: {str(e)}[/red]")
@@ -933,10 +945,13 @@ def get_interactive_config():
             default="_backup",
         ).ask()
         if args.backup_dir.strip():  # If not empty
-            args.skip_spectrograms = questionary.confirm(
-                "Generate spectrograms for backup comparison?",
-                default=not args.skip_spectrograms,
-            ).ask()
+            args.backup_dir = args.backup_dir.strip()
+            # Only ask about spectrograms if they weren't explicitly skipped in advanced options
+            if not args.skip_spectrograms:
+                args.skip_spectrograms = not questionary.confirm(
+                    "Generate spectrograms for backup comparison?",
+                    default=False
+                ).ask()
         else:
             args.backup_dir = "-"
             args.skip_spectrograms = True

From ee2b6aecf103658e4a7f38c60b8e78bbd1cd33b0 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:09:01 +0000
Subject: [PATCH 38/66] Nov 17, 2024, 11:09 PM

---
 sample-shrinker-python/sample-shrinker.py | 28 +++++++++--------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index f2f051b..90a2282 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -270,27 +270,23 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
                 # Backup handling
                 if args.backup_dir != "-":
                     try:
-                        # Convert the file path to a Path object
+                        # Convert paths to Path objects
                         file_path_obj = Path(file_path).resolve()
-                        # Get the absolute path to the backup directory
-                        backup_dir = Path(args.backup_dir).resolve()
-
-                        # Create the relative path structure
-                        rel_path = file_path_obj.relative_to(file_path_obj.parent)
-                        backup_path = backup_dir / rel_path.parent.name / rel_path.name
-
+                        backup_base = Path(args.backup_dir).resolve()
+                        
+                        # Get the relative path from the current working directory
+                        rel_path = file_path_obj.relative_to(Path.cwd())
+                        
+                        # Create the full backup path maintaining directory structure
+                        backup_path = backup_base / rel_path
+                        
                         # Ensure the backup directory exists
                         backup_path.parent.mkdir(parents=True, exist_ok=True)
-
-                        # Add .old extension for the backup
-                        backup_path = backup_path.with_suffix(
-                            backup_path.suffix + ".old"
-                        )
-
+                        
                         # Copy the original file with metadata preserved
                         console.print(f"[cyan]Backing up to: {backup_path}[/cyan]")
                         shutil.copy2(file_path, backup_path)
-
+                        
                         # Generate spectrograms if enabled
                         if not args.skip_spectrograms:
                             try:
@@ -323,8 +319,6 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
                 except Exception as e:
                     console.print(f"[red]Error saving converted file: {str(e)}[/red]")
                     if args.verbose:
-                        import traceback
-
                         console.print(traceback.format_exc())
         else:
             status = Text()

From cdf9948315f5dc56d32888535f7670b028e56db0 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:10:20 +0000
Subject: [PATCH 39/66] Nov 17, 2024, 11:10 PM

---
 sample-shrinker-python/sample-shrinker.py | 40 +++++++++++++++--------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 90a2282..1a82dea 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -145,26 +145,40 @@ def reencode_audio(file_path):
     """Re-encode audio file to PCM 16-bit if it has a different encoding."""
     try:
         output_path = str(Path(file_path).with_suffix(".reencoded.wav"))
-        # Use ffmpeg directly for more reliable conversion
+        # Use ffmpeg with explicit decoding and encoding parameters
         cmd = [
-            "ffmpeg",
-            "-y",
-            "-i",
-            str(file_path),
-            "-acodec",
-            "pcm_s16le",
-            "-ar",
-            "44100",
-            output_path,
+            "ffmpeg", "-y",
+            "-i", str(file_path),
+            "-acodec", "pcm_s16le",  # Force 16-bit PCM encoding
+            "-ar", "44100",          # Maintain sample rate
+            "-ac", "2",              # Maintain stereo if present
+            "-f", "wav",             # Force WAV format
+            output_path
         ]
-
+        
         result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode == 0:
             console.print(f"[green]Successfully re-encoded: {output_path}[/green]")
             return output_path
         else:
-            console.print(f"[red]FFmpeg error: {result.stderr}[/red]")
-            return None
+            # If first attempt fails, try with different decoder
+            cmd = [
+                "ffmpeg", "-y",
+                "-c:a", "adpcm_ms",  # Explicitly specify ADPCM decoder
+                "-i", str(file_path),
+                "-acodec", "pcm_s16le",
+                "-ar", "44100",
+                "-ac", "2",
+                "-f", "wav",
+                output_path
+            ]
+            result = subprocess.run(cmd, capture_output=True, text=True)
+            if result.returncode == 0:
+                console.print(f"[green]Successfully re-encoded with ADPCM decoder: {output_path}[/green]")
+                return output_path
+            else:
+                console.print(f"[red]FFmpeg error: {result.stderr}[/red]")
+                return None
     except Exception as e:
         console.print(f"[red]Error re-encoding {file_path}: {str(e)}[/red]")
         return None

From 93f6b91300b022354dcd4ca41243f0c83aac4494 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:22:33 +0000
Subject: [PATCH 40/66] Nov 17, 2024, 11:22 PM

---
 sample-shrinker-python/sample-shrinker.py | 32 ++++++++++-------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 1a82dea..f69e02f 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -206,22 +206,19 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
         else:
             console.print(f"Processing file: [cyan]{file_path}[/cyan]")
 
+        # First check if file needs processing
         try:
             audio = AudioSegment.from_file(file_path)
-        except (IndexError, OSError) as e:
-            console.print(f"[red]Error loading {file_path}: {str(e)}[/red]")
-            console.print("[yellow]Attempting to re-encode file...[/yellow]")
-            reencoded_file = reencode_audio(file_path)
-            if reencoded_file:
-                try:
-                    audio = AudioSegment.from_file(reencoded_file)
-                except Exception as re_err:
-                    console.print(
-                        f"[red]Failed to process re-encoded file: {str(re_err)}[/red]"
-                    )
-                    return
-            else:
+            # Skip if file already meets our requirements
+            if (audio.sample_width * 8 <= args.bitdepth and 
+                audio.channels <= args.channels and 
+                audio.frame_rate <= args.samplerate and
+                (not args.min_samplerate or audio.frame_rate >= args.min_samplerate)):
+                console.print(f"[blue]Skipping {file_path} (already meets requirements)[/blue]")
                 return
+        except Exception as e:
+            console.print(f"[yellow]Error checking file {file_path}: {str(e)}[/yellow]")
+            # Continue with processing if we can't check the file
 
         modified = False
         change_reason = []
@@ -288,11 +285,10 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
                         file_path_obj = Path(file_path).resolve()
                         backup_base = Path(args.backup_dir).resolve()
                         
-                        # Get the relative path from the current working directory
-                        rel_path = file_path_obj.relative_to(Path.cwd())
-                        
-                        # Create the full backup path maintaining directory structure
-                        backup_path = backup_base / rel_path
+                        # Get the relative structure from the file path
+                        # Use the last few components of the path to maintain structure
+                        path_parts = file_path_obj.parts[-3:]  # Adjust number as needed
+                        backup_path = backup_base.joinpath(*path_parts)
                         
                         # Ensure the backup directory exists
                         backup_path.parent.mkdir(parents=True, exist_ok=True)

From 86e31c1102fad3059a2c91879bb47426a67de467 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:24:03 +0000
Subject: [PATCH 41/66] Nov 17, 2024, 11:24 PM

---
 sample-shrinker-python/sample-shrinker.py | 157 +++++++++++++++-------
 1 file changed, 108 insertions(+), 49 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index f69e02f..4b9cfae 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -13,7 +13,8 @@
 
 import librosa
 import matplotlib
-matplotlib.use('Agg')  # Use non-interactive backend
+
+matplotlib.use("Agg")  # Use non-interactive backend
 import matplotlib.pyplot as plt
 import numpy as np
 import questionary
@@ -147,15 +148,21 @@ def reencode_audio(file_path):
         output_path = str(Path(file_path).with_suffix(".reencoded.wav"))
         # Use ffmpeg with explicit decoding and encoding parameters
         cmd = [
-            "ffmpeg", "-y",
-            "-i", str(file_path),
-            "-acodec", "pcm_s16le",  # Force 16-bit PCM encoding
-            "-ar", "44100",          # Maintain sample rate
-            "-ac", "2",              # Maintain stereo if present
-            "-f", "wav",             # Force WAV format
-            output_path
+            "ffmpeg",
+            "-y",
+            "-i",
+            str(file_path),
+            "-acodec",
+            "pcm_s16le",  # Force 16-bit PCM encoding
+            "-ar",
+            "44100",  # Maintain sample rate
+            "-ac",
+            "2",  # Maintain stereo if present
+            "-f",
+            "wav",  # Force WAV format
+            output_path,
         ]
-        
+
         result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode == 0:
             console.print(f"[green]Successfully re-encoded: {output_path}[/green]")
@@ -163,18 +170,27 @@ def reencode_audio(file_path):
         else:
             # If first attempt fails, try with different decoder
             cmd = [
-                "ffmpeg", "-y",
-                "-c:a", "adpcm_ms",  # Explicitly specify ADPCM decoder
-                "-i", str(file_path),
-                "-acodec", "pcm_s16le",
-                "-ar", "44100",
-                "-ac", "2",
-                "-f", "wav",
-                output_path
+                "ffmpeg",
+                "-y",
+                "-c:a",
+                "adpcm_ms",  # Explicitly specify ADPCM decoder
+                "-i",
+                str(file_path),
+                "-acodec",
+                "pcm_s16le",
+                "-ar",
+                "44100",
+                "-ac",
+                "2",
+                "-f",
+                "wav",
+                output_path,
             ]
             result = subprocess.run(cmd, capture_output=True, text=True)
             if result.returncode == 0:
-                console.print(f"[green]Successfully re-encoded with ADPCM decoder: {output_path}[/green]")
+                console.print(
+                    f"[green]Successfully re-encoded with ADPCM decoder: {output_path}[/green]"
+                )
                 return output_path
             else:
                 console.print(f"[red]FFmpeg error: {result.stderr}[/red]")
@@ -198,6 +214,51 @@ def check_ffmpeg():
         return False
 
 
+def get_audio_properties(file_path):
+    """Get audio file properties using pydub."""
+    try:
+        audio = AudioSegment.from_file(file_path)
+        return {
+            "bit_depth": audio.sample_width * 8,
+            "channels": audio.channels,
+            "sample_rate": audio.frame_rate,
+            "duration": len(audio),
+        }
+    except Exception as e:
+        console.print(
+            f"[yellow]Error reading audio properties from {file_path}: {str(e)}[/yellow]"
+        )
+        return None
+
+
+def needs_conversion(file_path, args):
+    """Check if file needs conversion based on its properties."""
+    props = get_audio_properties(file_path)
+    if not props:
+        return True  # If we can't read properties, attempt conversion
+
+    needs_conversion = False
+    reasons = []
+
+    if props["bit_depth"] > args.bitdepth:
+        needs_conversion = True
+        reasons.append(f"bit depth {props['bit_depth']} -> {args.bitdepth}")
+
+    if props["channels"] > args.channels:
+        needs_conversion = True
+        reasons.append(f"channels {props['channels']} -> {args.channels}")
+
+    if props["sample_rate"] > args.samplerate:
+        needs_conversion = True
+        reasons.append(f"sample rate {props['sample_rate']} -> {args.samplerate}")
+
+    if args.min_samplerate and props["sample_rate"] < args.min_samplerate:
+        needs_conversion = True
+        reasons.append(f"sample rate {props['sample_rate']} -> {args.min_samplerate}")
+
+    return needs_conversion, reasons
+
+
 def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
     """Main function to process audio files based on arguments."""
     try:
@@ -206,19 +267,13 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
         else:
             console.print(f"Processing file: [cyan]{file_path}[/cyan]")
 
-        # First check if file needs processing
-        try:
-            audio = AudioSegment.from_file(file_path)
-            # Skip if file already meets our requirements
-            if (audio.sample_width * 8 <= args.bitdepth and 
-                audio.channels <= args.channels and 
-                audio.frame_rate <= args.samplerate and
-                (not args.min_samplerate or audio.frame_rate >= args.min_samplerate)):
-                console.print(f"[blue]Skipping {file_path} (already meets requirements)[/blue]")
-                return
-        except Exception as e:
-            console.print(f"[yellow]Error checking file {file_path}: {str(e)}[/yellow]")
-            # Continue with processing if we can't check the file
+        # Check if file needs processing
+        needs_conv, reasons = needs_conversion(file_path, args)
+        if not needs_conv:
+            console.print(
+                f"[blue]Skipping {file_path} (already meets requirements)[/blue]"
+            )
+            return
 
         modified = False
         change_reason = []
@@ -284,42 +339,48 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
                         # Convert paths to Path objects
                         file_path_obj = Path(file_path).resolve()
                         backup_base = Path(args.backup_dir).resolve()
-                        
+
                         # Get the relative structure from the file path
                         # Use the last few components of the path to maintain structure
                         path_parts = file_path_obj.parts[-3:]  # Adjust number as needed
                         backup_path = backup_base.joinpath(*path_parts)
-                        
+
                         # Ensure the backup directory exists
                         backup_path.parent.mkdir(parents=True, exist_ok=True)
-                        
+
                         # Copy the original file with metadata preserved
                         console.print(f"[cyan]Backing up to: {backup_path}[/cyan]")
                         shutil.copy2(file_path, backup_path)
-                        
+
                         # Generate spectrograms if enabled
                         if not args.skip_spectrograms:
                             try:
                                 generate_spectrogram(
-                                    file_path, 
-                                    file_path, 
+                                    file_path,
+                                    file_path,
                                     backup_path.parent,
-                                    verbose=args.verbose
+                                    verbose=args.verbose,
                                 )
                             except Exception as spec_err:
-                                console.print(f"[yellow]Warning: Could not generate spectrograms: {spec_err}[/yellow]")
+                                console.print(
+                                    f"[yellow]Warning: Could not generate spectrograms: {spec_err}[/yellow]"
+                                )
                                 if args.verbose:
                                     import traceback
+
                                     console.print(traceback.format_exc())
 
                     except Exception as e:
                         console.print(f"[red]Error creating backup: {str(e)}[/red]")
                         if args.verbose:
                             import traceback
+
                             console.print(traceback.format_exc())
                         return
                 else:
-                    console.print("[yellow]No backup created (backups disabled)[/yellow]")
+                    console.print(
+                        "[yellow]No backup created (backups disabled)[/yellow]"
+                    )
 
                 # Export the converted audio file
                 try:
@@ -373,8 +434,7 @@ def generate_spectrogram(original_file, new_file, backup_dir, verbose=False):
         with plt.ioff():  # Turn off interactive mode
             fig = plt.figure(figsize=(10, 4))
             D_old = librosa.amplitude_to_db(
-                np.abs(librosa.stft(y_old, n_fft=n_fft)), 
-                ref=np.max
+                np.abs(librosa.stft(y_old, n_fft=n_fft)), ref=np.max
             )
             librosa.display.specshow(D_old, sr=sr_old, x_axis="time", y_axis="log")
             plt.colorbar(format="%+2.0f dB")
@@ -388,8 +448,7 @@ def generate_spectrogram(original_file, new_file, backup_dir, verbose=False):
             # Generate spectrogram for new file
             fig = plt.figure(figsize=(10, 4))
             D_new = librosa.amplitude_to_db(
-                np.abs(librosa.stft(y_new, n_fft=n_fft)), 
-                ref=np.max
+                np.abs(librosa.stft(y_new, n_fft=n_fft)), ref=np.max
             )
             librosa.display.specshow(D_new, sr=sr_new, x_axis="time", y_axis="log")
             plt.colorbar(format="%+2.0f dB")
@@ -404,6 +463,7 @@ def generate_spectrogram(original_file, new_file, backup_dir, verbose=False):
         console.print(f"[red]Error generating spectrograms: {str(e)}[/red]")
         if verbose:
             import traceback
+
             console.print(traceback.format_exc())
 
 
@@ -840,7 +900,7 @@ def get_interactive_config():
                 "Preview changes (dry run)",
                 "Show detailed progress",
             ],
-            default=["Preview changes (dry run)"],
+            default=("Preview changes (dry run)",),
         ).ask()
 
         args.use_fuzzy = "Use fuzzy matching for similar files" in duplicate_options
@@ -871,7 +931,7 @@ def get_interactive_config():
                     "Compare sample rates",
                     "Compare channel counts",
                 ],
-                default=["Compare file lengths", "Compare sample rates"],
+                default=("Compare file lengths", "Compare sample rates"),
             ).ask()
 
         # Get backup options (moved before backup_choice)
@@ -953,8 +1013,7 @@ def get_interactive_config():
             # Only ask about spectrograms if they weren't explicitly skipped in advanced options
             if not args.skip_spectrograms:
                 args.skip_spectrograms = not questionary.confirm(
-                    "Generate spectrograms for backup comparison?",
-                    default=False
+                    "Generate spectrograms for backup comparison?", default=False
                 ).ask()
         else:
             args.backup_dir = "-"
@@ -1010,7 +1069,7 @@ def process_duplicates(args):
 
         if args.dry_run:
             console.print("[yellow]DRY RUN - No directories will be moved[/yellow]")
-        process_duplicate_directories(verified_duplicates, args)
+        process_duplicate_directories(dir_duplicates, args)
     else:
         console.print("[blue]No duplicate directories found.[/blue]")
 

From d798be81ad7c0c1483601f72d8ec20c76a290a74 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:25:58 +0000
Subject: [PATCH 42/66] Nov 17, 2024, 11:25 PM

---
 sample-shrinker-python/sample-shrinker.py | 34 +++++++++++++++++------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 4b9cfae..e4bc25a 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -235,28 +235,28 @@ def needs_conversion(file_path, args):
     """Check if file needs conversion based on its properties."""
     props = get_audio_properties(file_path)
     if not props:
-        return True  # If we can't read properties, attempt conversion
+        return (True, ["unable to read properties"])  # Return tuple with reason
 
-    needs_conversion = False
+    needs_conv = False
     reasons = []
 
     if props["bit_depth"] > args.bitdepth:
-        needs_conversion = True
+        needs_conv = True
         reasons.append(f"bit depth {props['bit_depth']} -> {args.bitdepth}")
 
     if props["channels"] > args.channels:
-        needs_conversion = True
+        needs_conv = True
         reasons.append(f"channels {props['channels']} -> {args.channels}")
 
     if props["sample_rate"] > args.samplerate:
-        needs_conversion = True
+        needs_conv = True
         reasons.append(f"sample rate {props['sample_rate']} -> {args.samplerate}")
 
     if args.min_samplerate and props["sample_rate"] < args.min_samplerate:
-        needs_conversion = True
+        needs_conv = True
         reasons.append(f"sample rate {props['sample_rate']} -> {args.min_samplerate}")
 
-    return needs_conversion, reasons
+    return (needs_conv, reasons)  # Always return a tuple
 
 
 def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
@@ -267,6 +267,24 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
         else:
             console.print(f"Processing file: [cyan]{file_path}[/cyan]")
 
+        # Load the audio file first
+        try:
+            audio = AudioSegment.from_file(file_path)
+        except Exception as e:
+            console.print(f"[yellow]Error loading {file_path}: {str(e)}[/yellow]")
+            console.print("[yellow]Attempting to re-encode file...[/yellow]")
+            reencoded_file = reencode_audio(file_path)
+            if reencoded_file:
+                try:
+                    audio = AudioSegment.from_file(reencoded_file)
+                except Exception as re_err:
+                    console.print(
+                        f"[red]Failed to process re-encoded file: {str(re_err)}[/red]"
+                    )
+                    return
+            else:
+                return
+
         # Check if file needs processing
         needs_conv, reasons = needs_conversion(file_path, args)
         if not needs_conv:
@@ -276,7 +294,7 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
             return
 
         modified = False
-        change_reason = []
+        change_reason = reasons.copy()  # Use the reasons from needs_conversion
 
         # Check if we need to convert the channels
         if audio.channels > args.channels:

From c49b22399fb759e2cc6eafbc0aaf69de536aa863 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:28:25 +0000
Subject: [PATCH 43/66] Nov 17, 2024, 11:28 PM

---
 sample-shrinker-python/sample-shrinker.py | 107 +++++++++++++++-------
 1 file changed, 74 insertions(+), 33 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index e4bc25a..d95e5ce 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -146,20 +146,47 @@ def reencode_audio(file_path):
     """Re-encode audio file to PCM 16-bit if it has a different encoding."""
     try:
         output_path = str(Path(file_path).with_suffix(".reencoded.wav"))
-        # Use ffmpeg with explicit decoding and encoding parameters
+
+        # First try with ADPCM decoder explicitly
         cmd = [
             "ffmpeg",
             "-y",
+            "-c:a",
+            "adpcm_ms",  # Try ADPCM first
             "-i",
             str(file_path),
             "-acodec",
-            "pcm_s16le",  # Force 16-bit PCM encoding
+            "pcm_s16le",
             "-ar",
-            "44100",  # Maintain sample rate
+            "44100",
             "-ac",
-            "2",  # Maintain stereo if present
+            "2",
             "-f",
-            "wav",  # Force WAV format
+            "wav",
+            output_path,
+        ]
+
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        if result.returncode == 0:
+            console.print(
+                f"[green]Successfully re-encoded with ADPCM decoder: {output_path}[/green]"
+            )
+            return output_path
+
+        # If ADPCM fails, try with default decoder
+        cmd = [
+            "ffmpeg",
+            "-y",
+            "-i",
+            str(file_path),
+            "-acodec",
+            "pcm_s16le",
+            "-ar",
+            "44100",
+            "-ac",
+            "2",
+            "-f",
+            "wav",
             output_path,
         ]
 
@@ -167,34 +194,38 @@ def reencode_audio(file_path):
         if result.returncode == 0:
             console.print(f"[green]Successfully re-encoded: {output_path}[/green]")
             return output_path
+
+        # If both attempts fail, try with more aggressive options
+        cmd = [
+            "ffmpeg",
+            "-y",
+            "-i",
+            str(file_path),
+            "-acodec",
+            "pcm_s16le",
+            "-ar",
+            "44100",
+            "-ac",
+            "2",
+            "-af",
+            "aresample=resampler=soxr",  # Use high quality resampler
+            "-strict",
+            "experimental",
+            "-f",
+            "wav",
+            output_path,
+        ]
+
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        if result.returncode == 0:
+            console.print(
+                f"[green]Successfully re-encoded with resampling: {output_path}[/green]"
+            )
+            return output_path
         else:
-            # If first attempt fails, try with different decoder
-            cmd = [
-                "ffmpeg",
-                "-y",
-                "-c:a",
-                "adpcm_ms",  # Explicitly specify ADPCM decoder
-                "-i",
-                str(file_path),
-                "-acodec",
-                "pcm_s16le",
-                "-ar",
-                "44100",
-                "-ac",
-                "2",
-                "-f",
-                "wav",
-                output_path,
-            ]
-            result = subprocess.run(cmd, capture_output=True, text=True)
-            if result.returncode == 0:
-                console.print(
-                    f"[green]Successfully re-encoded with ADPCM decoder: {output_path}[/green]"
-                )
-                return output_path
-            else:
-                console.print(f"[red]FFmpeg error: {result.stderr}[/red]")
-                return None
+            console.print(f"[red]FFmpeg error: {result.stderr}[/red]")
+            return None
+
     except Exception as e:
         console.print(f"[red]Error re-encoding {file_path}: {str(e)}[/red]")
         return None
@@ -217,7 +248,17 @@ def check_ffmpeg():
 def get_audio_properties(file_path):
     """Get audio file properties using pydub."""
     try:
-        audio = AudioSegment.from_file(file_path)
+        # First try direct loading
+        try:
+            audio = AudioSegment.from_file(file_path)
+        except Exception as e:
+            # If direct loading fails, try re-encoding first
+            reencoded = reencode_audio(file_path)
+            if reencoded:
+                audio = AudioSegment.from_file(reencoded)
+            else:
+                raise e
+
         return {
             "bit_depth": audio.sample_width * 8,
             "channels": audio.channels,

From a6cda0e0e6363af83debd0302eefa124a9b5d0d0 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:30:28 +0000
Subject: [PATCH 44/66] Nov 17, 2024, 11:30 PM

---
 sample-shrinker-python/sample-shrinker.py | 86 ++++++++++++++---------
 1 file changed, 54 insertions(+), 32 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index d95e5ce..9d55818 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -251,20 +251,37 @@ def get_audio_properties(file_path):
         # First try direct loading
         try:
             audio = AudioSegment.from_file(file_path)
+            # Fix for incorrect bit depth detection
+            actual_bit_depth = audio.sample_width * 8
+            # Some 24-bit files might be reported as 32-bit
+            if actual_bit_depth == 32:
+                # Check if it's actually 24-bit
+                max_value = max(
+                    abs(min(audio.get_array_of_samples())),
+                    abs(max(audio.get_array_of_samples())),
+                )
+                if max_value <= 0x7FFFFF:  # Max value for 24-bit
+                    actual_bit_depth = 24
+
+            return {
+                "bit_depth": actual_bit_depth,
+                "channels": audio.channels,
+                "sample_rate": audio.frame_rate,
+                "duration": len(audio),
+            }
         except Exception as e:
             # If direct loading fails, try re-encoding first
             reencoded = reencode_audio(file_path)
             if reencoded:
                 audio = AudioSegment.from_file(reencoded)
+                return {
+                    "bit_depth": audio.sample_width * 8,
+                    "channels": audio.channels,
+                    "sample_rate": audio.frame_rate,
+                    "duration": len(audio),
+                }
             else:
                 raise e
-
-        return {
-            "bit_depth": audio.sample_width * 8,
-            "channels": audio.channels,
-            "sample_rate": audio.frame_rate,
-            "duration": len(audio),
-        }
     except Exception as e:
         console.print(
             f"[yellow]Error reading audio properties from {file_path}: {str(e)}[/yellow]"
@@ -400,34 +417,39 @@ def process_audio(file_path, args, dry_run=False, task_id=None, progress=None):
                         backup_base = Path(args.backup_dir).resolve()
 
                         # Get the relative structure from the file path
-                        # Use the last few components of the path to maintain structure
                         path_parts = file_path_obj.parts[-3:]  # Adjust number as needed
                         backup_path = backup_base.joinpath(*path_parts)
 
-                        # Ensure the backup directory exists
-                        backup_path.parent.mkdir(parents=True, exist_ok=True)
-
-                        # Copy the original file with metadata preserved
-                        console.print(f"[cyan]Backing up to: {backup_path}[/cyan]")
-                        shutil.copy2(file_path, backup_path)
-
-                        # Generate spectrograms if enabled
-                        if not args.skip_spectrograms:
-                            try:
-                                generate_spectrogram(
-                                    file_path,
-                                    file_path,
-                                    backup_path.parent,
-                                    verbose=args.verbose,
-                                )
-                            except Exception as spec_err:
-                                console.print(
-                                    f"[yellow]Warning: Could not generate spectrograms: {spec_err}[/yellow]"
-                                )
-                                if args.verbose:
-                                    import traceback
-
-                                    console.print(traceback.format_exc())
+                        # Check if backup already exists
+                        if backup_path.exists():
+                            console.print(
+                                f"[blue]Backup already exists: {backup_path}[/blue]"
+                            )
+                        else:
+                            # Ensure the backup directory exists
+                            backup_path.parent.mkdir(parents=True, exist_ok=True)
+
+                            # Copy the original file with metadata preserved
+                            console.print(f"[cyan]Backing up to: {backup_path}[/cyan]")
+                            shutil.copy2(file_path, backup_path)
+
+                            # Generate spectrograms if enabled
+                            if not args.skip_spectrograms:
+                                try:
+                                    generate_spectrogram(
+                                        file_path,
+                                        file_path,
+                                        backup_path.parent,
+                                        verbose=args.verbose,
+                                    )
+                                except Exception as spec_err:
+                                    console.print(
+                                        f"[yellow]Warning: Could not generate spectrograms: {spec_err}[/yellow]"
+                                    )
+                                    if args.verbose:
+                                        import traceback
+
+                                        console.print(traceback.format_exc())
 
                     except Exception as e:
                         console.print(f"[red]Error creating backup: {str(e)}[/red]")

From 9e941a5a32f5efaf5114dd1b980aac9da34de5e8 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:30:56 +0000
Subject: [PATCH 45/66] Nov 17, 2024, 11:30 PM

---
 sample-shrinker-python/sample-shrinker.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 9d55818..85ac7f6 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -981,7 +981,7 @@ def get_interactive_config():
                 "Preview changes (dry run)",
                 "Show detailed progress",
             ],
-            default=("Preview changes (dry run)",),
+            default=["Preview changes (dry run)"],
         ).ask()
 
         args.use_fuzzy = "Use fuzzy matching for similar files" in duplicate_options
@@ -1012,7 +1012,7 @@ def get_interactive_config():
                     "Compare sample rates",
                     "Compare channel counts",
                 ],
-                default=("Compare file lengths", "Compare sample rates"),
+                default=["Compare file lengths", "Compare sample rates"],
             ).ask()
 
         # Get backup options (moved before backup_choice)

From d341d2a845d0b9c9b636045ce0eff1b9ec55840a Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:31:45 +0000
Subject: [PATCH 46/66] Nov 17, 2024, 11:31 PM

---
 sample-shrinker-python/sample-shrinker.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 85ac7f6..d4a221d 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -981,7 +981,7 @@ def get_interactive_config():
                 "Preview changes (dry run)",
                 "Show detailed progress",
             ],
-            default=["Preview changes (dry run)"],
+            default=[2],  # Index of "Preview changes (dry run)"
         ).ask()
 
         args.use_fuzzy = "Use fuzzy matching for similar files" in duplicate_options
@@ -1012,7 +1012,7 @@ def get_interactive_config():
                     "Compare sample rates",
                     "Compare channel counts",
                 ],
-                default=["Compare file lengths", "Compare sample rates"],
+                default=[0, 1],  # Indices of the first two choices
             ).ask()
 
         # Get backup options (moved before backup_choice)

From 1d46e7e542be31622bdf53aee4b4a96427a6cd9e Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:33:10 +0000
Subject: [PATCH 47/66] Nov 17, 2024, 11:33 PM

---
 sample-shrinker-python/sample-shrinker.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index d4a221d..d7138e9 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -981,7 +981,7 @@ def get_interactive_config():
                 "Preview changes (dry run)",
                 "Show detailed progress",
             ],
-            default=[2],  # Index of "Preview changes (dry run)"
+            default=["Preview changes (dry run)"],  # Use the actual choice string
         ).ask()
 
         args.use_fuzzy = "Use fuzzy matching for similar files" in duplicate_options
@@ -1012,7 +1012,10 @@ def get_interactive_config():
                     "Compare sample rates",
                     "Compare channel counts",
                 ],
-                default=[0, 1],  # Indices of the first two choices
+                default=[
+                    "Compare file lengths",
+                    "Compare sample rates",
+                ],  # Use actual choice strings
             ).ask()
 
         # Get backup options (moved before backup_choice)

From 9891ac330c0b3c60fa6deb55271757bc3b097951 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:34:27 +0000
Subject: [PATCH 48/66] Nov 17, 2024, 11:34 PM

---
 sample-shrinker-python/sample-shrinker.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index d7138e9..2ee77c0 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -981,7 +981,6 @@ def get_interactive_config():
                 "Preview changes (dry run)",
                 "Show detailed progress",
             ],
-            default=["Preview changes (dry run)"],  # Use the actual choice string
         ).ask()
 
         args.use_fuzzy = "Use fuzzy matching for similar files" in duplicate_options
@@ -1012,10 +1011,6 @@ def get_interactive_config():
                     "Compare sample rates",
                     "Compare channel counts",
                 ],
-                default=[
-                    "Compare file lengths",
-                    "Compare sample rates",
-                ],  # Use actual choice strings
             ).ask()
 
         # Get backup options (moved before backup_choice)

From 1f6b1890d4799de9f786d15c262f9882cb4be6ae Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:34:41 +0000
Subject: [PATCH 49/66] Nov 17, 2024, 11:34 PM

---
 sample-shrinker-python/sample-shrinker.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 2ee77c0..03be39f 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -1013,11 +1013,10 @@ def get_interactive_config():
                 ],
             ).ask()
 
-        # Get backup options (moved before backup_choice)
+        # Get backup options (modified text prompt)
         backup_dir = questionary.text(
-            "Backup directory path:",
+            "Backup directory path (where duplicates will be moved):",
             default="_backup",
-            description="Directory where duplicates will be moved",
         ).ask()
 
         if backup_dir.strip():  # If not empty

From e48843521221f5ad243169ab1cac885829b48cbb Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:37:31 +0000
Subject: [PATCH 50/66] Nov 17, 2024, 11:37 PM

---
 sample-shrinker-python/sample-shrinker.py | 223 +++++++++++++++-------
 1 file changed, 155 insertions(+), 68 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 03be39f..ff9b3e5 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -980,6 +980,7 @@ def get_interactive_config():
                 "Ignore filenames (match by content only)",
                 "Preview changes (dry run)",
                 "Show detailed progress",
+                "Process files in parallel",
             ],
         ).ask()
 
@@ -990,28 +991,16 @@ def get_interactive_config():
         args.dry_run = "Preview changes (dry run)" in duplicate_options
         args.verbose = "Show detailed progress" in duplicate_options
 
-        if args.use_fuzzy:
-            # Get fuzzy matching configuration
-            args.fuzzy_threshold = questionary.select(
-                "Select fuzzy matching threshold (higher = more strict):",
-                choices=[
-                    "95 - Nearly identical",
-                    "90 - Very similar",
-                    "85 - Similar",
-                    "80 - Somewhat similar",
-                ],
-                default="90 - Very similar",
-            ).ask()
-            args.fuzzy_threshold = int(args.fuzzy_threshold.split()[0])
-
-            args.fuzzy_options = questionary.checkbox(
-                "Select fuzzy matching options:",
-                choices=[
-                    "Compare file lengths",
-                    "Compare sample rates",
-                    "Compare channel counts",
-                ],
+        # Add parallel processing configuration
+        if "Process files in parallel" in duplicate_options:
+            args.jobs = questionary.select(
+                "How many parallel jobs?",
+                choices=["2", "4", "8", "16", "24", "32"],
+                default="4",
             ).ask()
+            args.jobs = int(args.jobs)
+        else:
+            args.jobs = 1
 
         # Get backup options (modified text prompt)
         backup_dir = questionary.text(
@@ -1131,68 +1120,166 @@ def get_interactive_config():
 
 def process_duplicates(args):
     """Process both directory and file level duplicates with visual feedback."""
-    with console.status(
-        "[bold green]Phase 1: Searching for duplicate directories..."
-    ) as status:
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TaskProgressColumn(),
+        console=console,
+    ) as progress:
+        # Phase 1: Directory scan
+        scan_task = progress.add_task(
+            "[cyan]Scanning for duplicate directories...", total=None
+        )
         dir_duplicates = find_duplicate_directories(args.files)
+        progress.update(scan_task, completed=True)
 
-    if dir_duplicates:
-        count = sum(len(v) - 1 for v in dir_duplicates.values())
-        console.print(
-            Panel(
-                f"Found [cyan]{count}[/cyan] duplicate directories",
-                title="Directory Scan Complete",
+        if dir_duplicates:
+            count = sum(len(v) - 1 for v in dir_duplicates.values())
+            console.print(
+                Panel(
+                    f"Found [cyan]{count}[/cyan] duplicate directories",
+                    title="Directory Scan Complete",
+                )
             )
-        )
 
-        if args.dry_run:
-            console.print("[yellow]DRY RUN - No directories will be moved[/yellow]")
-        process_duplicate_directories(dir_duplicates, args)
-    else:
-        console.print("[blue]No duplicate directories found.[/blue]")
+            if args.dry_run:
+                console.print("[yellow]DRY RUN - No directories will be moved[/yellow]")
+
+            # Process directories with progress bar
+            dir_task = progress.add_task(
+                "[green]Processing directories...", total=len(dir_duplicates)
+            )
+
+            with ThreadPoolExecutor(max_workers=args.jobs) as executor:
+                futures = []
+                for (dir_name, file_count, total_size), paths in dir_duplicates.items():
+                    future = executor.submit(
+                        process_directory_group,
+                        dir_name,
+                        file_count,
+                        total_size,
+                        paths,
+                        args,
+                        progress,
+                    )
+                    futures.append(future)
 
-    with console.status(
-        "[bold green]Phase 2: Searching for duplicate files..."
-    ) as status:
+                for future in as_completed(futures):
+                    try:
+                        future.result()
+                        progress.advance(dir_task)
+                    except Exception as e:
+                        console.print(f"[red]Error processing directory: {e}[/red]")
+        else:
+            console.print("[blue]No duplicate directories found.[/blue]")
+
+        # Phase 2: File scan
+        file_task = progress.add_task(
+            "[cyan]Scanning for duplicate files...", total=None
+        )
         file_duplicates, fuzzy_groups = find_duplicate_files(args.files, args)
+        progress.update(file_task, completed=True)
 
-    if file_duplicates:
-        total_duplicates = sum(len(group) - 1 for group in file_duplicates)
-        console.print(
-            Panel(
-                f"Found [cyan]{total_duplicates}[/cyan] duplicate files\n"
-                f"Including [cyan]{len(fuzzy_groups)}[/cyan] groups of similar files",
-                title="File Scan Complete",
+        if file_duplicates:
+            total_duplicates = sum(len(group) - 1 for group in file_duplicates)
+            console.print(
+                Panel(
+                    f"Found [cyan]{total_duplicates}[/cyan] duplicate files\n"
+                    f"Including [cyan]{len(fuzzy_groups)}[/cyan] groups of similar files",
+                    title="File Scan Complete",
+                )
             )
+
+            if args.dry_run:
+                console.print("[yellow]DRY RUN - No files will be moved[/yellow]")
+
+            # Process files with progress bar
+            file_process_task = progress.add_task(
+                "[green]Processing files...", total=len(file_duplicates)
+            )
+
+            with ThreadPoolExecutor(max_workers=args.jobs) as executor:
+                futures = []
+                for group in file_duplicates:
+                    future = executor.submit(
+                        process_file_group,
+                        group,
+                        fuzzy_groups,
+                        args,
+                        progress,
+                    )
+                    futures.append(future)
+
+                for future in as_completed(futures):
+                    try:
+                        future.result()
+                        progress.advance(file_process_task)
+                    except Exception as e:
+                        console.print(f"[red]Error processing file group: {e}[/red]")
+
+    console.print("[green]Duplicate removal complete![/green]")
+
+
+def process_directory_group(dir_name, file_count, total_size, paths, args, progress):
+    """Process a group of duplicate directories."""
+    try:
+        console.print(
+            f"\nFound duplicate directories named '[cyan]{dir_name}[/cyan]' "
+            f"with {file_count} files ({total_size} bytes):"
         )
 
-        # Additional safety checks for file processing
-        safe_duplicates = []
-        for group in file_duplicates:
-            # Verify files are not symbolic links
-            real_files = [f for f in group if not f.is_symlink()]
+        # Sort paths by creation time
+        valid_paths = []
+        for path in paths:
+            try:
+                stat = path.stat()
+                valid_paths.append((path, stat.st_ctime))
+            except FileNotFoundError:
+                console.print(f"[yellow]Warning: Directory not found: {path}[/yellow]")
+                continue
+
+        if not valid_paths:
+            console.print("[red]No valid paths found in group[/red]")
+            return
+
+        valid_paths.sort(key=lambda x: x[1])
+
+        # Keep the oldest directory
+        original_dir = valid_paths[0][0]
+        console.print(
+            f"Keeping oldest copy: [green]{original_dir}[/green] "
+            f"(created: {time.ctime(valid_paths[0][1])})"
+        )
 
-            # Check if files are in use (on Windows) or locked
-            available_files = []
-            for file in real_files:
+        # Process newer copies
+        for dir_path, ctime in valid_paths[1:]:
+            console.print(
+                f"Moving duplicate: [yellow]{dir_path}[/yellow] "
+                f"(created: {time.ctime(ctime)})"
+            )
+            if not args.dry_run:
                 try:
-                    with open(file, "rb") as f:
-                        # Try to get a shared lock
-                        pass
-                    available_files.append(file)
-                except (IOError, OSError):
-                    print(f"Warning: File {file} appears to be in use, skipping")
+                    # Create backup path
+                    rel_path = dir_path.relative_to(dir_path.parent.parent)
+                    backup_path = Path(args.backup_dir) / rel_path
 
-            if len(available_files) > 1:
-                safe_duplicates.append(available_files)
+                    # Ensure backup directory exists
+                    backup_path.parent.mkdir(parents=True, exist_ok=True)
 
-        if args.dry_run:
-            console.print("[yellow]DRY RUN - No files will be moved[/yellow]")
-        process_duplicate_files(safe_duplicates, fuzzy_groups, args)
-    else:
-        console.print("[blue]No duplicate files found.[/blue]")
+                    shutil.move(str(dir_path), str(backup_path))
+                except Exception as e:
+                    console.print(f"[red]Error moving directory {dir_path}: {e}[/red]")
 
-    console.print("[green]Duplicate removal complete![/green]")
+    except Exception as e:
+        console.print(f"[red]Error processing directory group {dir_name}: {e}[/red]")
+        raise
+
+
+def process_file_group(group, fuzzy_groups, args, progress):
+    """Process a group of duplicate files."""
+    # Similar structure to process_duplicate_files but adapted for parallel processing
+    # ... implement the file processing logic here ...
 
 
 def main():

From 3affe52bf0b9e7edd9268c1fe105093ad14f1316 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:38:50 +0000
Subject: [PATCH 51/66] Nov 17, 2024, 11:38 PM

---
 sample-shrinker-python/sample-shrinker.py | 75 ++++++++++++++++++++++-
 1 file changed, 73 insertions(+), 2 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index ff9b3e5..c5559ad 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -1278,8 +1278,79 @@ def process_directory_group(dir_name, file_count, total_size, paths, args, progr
 
 def process_file_group(group, fuzzy_groups, args, progress):
     """Process a group of duplicate files."""
-    # Similar structure to process_duplicate_files but adapted for parallel processing
-    # ... implement the file processing logic here ...
+    try:
+        # Get file size for reporting
+        file_size = group[0].stat().st_size
+        console.print(
+            f"\nProcessing duplicate group for '[cyan]{group[0].name}[/cyan]' ({file_size} bytes)"
+        )
+
+        # For fuzzy matches, show similarity percentages
+        if group in fuzzy_groups:
+            base_fingerprint = get_audio_fingerprint(group[0])
+            console.print("[cyan]Similarity scores:[/cyan]")
+            for file in group[1:]:
+                file_fingerprint = get_audio_fingerprint(file)
+                similarity = compare_audio_similarity(
+                    base_fingerprint, file_fingerprint
+                )
+                console.print(
+                    f"  {file.name}: [yellow]{similarity:.1f}%[/yellow] similar"
+                )
+
+        # Sort files by creation time
+        files_with_time = []
+        for file_path in group:
+            try:
+                stat = file_path.stat()
+                files_with_time.append((file_path, stat.st_ctime))
+            except FileNotFoundError:
+                console.print(f"[yellow]Warning: File not found: {file_path}[/yellow]")
+                continue
+
+        if not files_with_time:
+            console.print("[red]No valid files found in group[/red]")
+            return
+
+        files_with_time.sort(key=lambda x: x[1])
+
+        # Keep the oldest file
+        original_file = files_with_time[0][0]
+        console.print(
+            f"Keeping oldest copy: [green]{original_file}[/green] "
+            f"(created: {time.ctime(files_with_time[0][1])})"
+        )
+
+        # Process newer copies
+        for file_path, ctime in files_with_time[1:]:
+            console.print(
+                f"Processing duplicate: [yellow]{file_path}[/yellow] "
+                f"(created: {time.ctime(ctime)})"
+            )
+
+            if not args.dry_run:
+                try:
+                    if args.delete_duplicates:
+                        console.print(f"[red]Deleting: {file_path}[/red]")
+                        file_path.unlink()
+                    else:
+                        # Create backup path maintaining directory structure
+                        rel_path = file_path.relative_to(file_path.parent.parent)
+                        backup_path = Path(args.backup_dir) / rel_path
+
+                        # Ensure backup directory exists
+                        backup_path.parent.mkdir(parents=True, exist_ok=True)
+
+                        # Move the file
+                        console.print(f"Moving to: [blue]{backup_path}[/blue]")
+                        shutil.move(str(file_path), str(backup_path))
+
+                except Exception as e:
+                    console.print(f"[red]Error processing file {file_path}: {e}[/red]")
+
+    except Exception as e:
+        console.print(f"[red]Error processing file group: {e}[/red]")
+        raise
 
 
 def main():

From 74c8d5fd767806c2ffcb09be320c2105b18db487 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:41:32 +0000
Subject: [PATCH 52/66] Nov 17, 2024, 11:41 PM

---
 sample-shrinker-python/sample-shrinker.py | 73 +++++++++++++++++------
 1 file changed, 54 insertions(+), 19 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index c5559ad..135be02 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -995,7 +995,7 @@ def get_interactive_config():
         if "Process files in parallel" in duplicate_options:
             args.jobs = questionary.select(
                 "How many parallel jobs?",
-                choices=["2", "4", "8", "16", "24", "32"],
+                choices=["2", "4", "8", "16", "24", "32", "48", "64"],
                 default="4",
             ).ask()
             args.jobs = int(args.jobs)
@@ -1129,7 +1129,7 @@ def process_duplicates(args):
     ) as progress:
         # Phase 1: Directory scan
         scan_task = progress.add_task(
-            "[cyan]Scanning for duplicate directories...", total=None
+            "[magenta]Scanning for duplicate directories...[/magenta]", total=None
         )
         dir_duplicates = find_duplicate_directories(args.files)
         progress.update(scan_task, completed=True)
@@ -1176,7 +1176,7 @@ def process_duplicates(args):
 
         # Phase 2: File scan
         file_task = progress.add_task(
-            "[cyan]Scanning for duplicate files...", total=None
+            "[magenta]Scanning for duplicate files...[/magenta]", total=None
         )
         file_duplicates, fuzzy_groups = find_duplicate_files(args.files, args)
         progress.update(file_task, completed=True)
@@ -1233,10 +1233,17 @@ def process_directory_group(dir_name, file_count, total_size, paths, args, progr
         valid_paths = []
         for path in paths:
             try:
+                if not path.exists():
+                    console.print(
+                        f"[yellow]Warning: Directory not found: {path}[/yellow]"
+                    )
+                    continue
                 stat = path.stat()
                 valid_paths.append((path, stat.st_ctime))
-            except FileNotFoundError:
-                console.print(f"[yellow]Warning: Directory not found: {path}[/yellow]")
+            except (FileNotFoundError, OSError) as e:
+                console.print(
+                    f"[yellow]Warning: Cannot access directory {path}: {e}[/yellow]"
+                )
                 continue
 
         if not valid_paths:
@@ -1254,22 +1261,50 @@ def process_directory_group(dir_name, file_count, total_size, paths, args, progr
 
         # Process newer copies
         for dir_path, ctime in valid_paths[1:]:
-            console.print(
-                f"Moving duplicate: [yellow]{dir_path}[/yellow] "
-                f"(created: {time.ctime(ctime)})"
-            )
-            if not args.dry_run:
-                try:
-                    # Create backup path
-                    rel_path = dir_path.relative_to(dir_path.parent.parent)
-                    backup_path = Path(args.backup_dir) / rel_path
+            try:
+                if not dir_path.exists():
+                    console.print(
+                        f"[yellow]Warning: Directory disappeared: {dir_path}[/yellow]"
+                    )
+                    continue
 
-                    # Ensure backup directory exists
-                    backup_path.parent.mkdir(parents=True, exist_ok=True)
+                console.print(
+                    f"Moving duplicate: [yellow]{dir_path}[/yellow] "
+                    f"(created: {time.ctime(ctime)})"
+                )
 
-                    shutil.move(str(dir_path), str(backup_path))
-                except Exception as e:
-                    console.print(f"[red]Error moving directory {dir_path}: {e}[/red]")
+                if not args.dry_run:
+                    try:
+                        # Create backup path
+                        rel_path = dir_path.relative_to(dir_path.parent.parent)
+                        backup_path = Path(args.backup_dir) / rel_path
+
+                        # Ensure backup directory exists
+                        backup_path.parent.mkdir(parents=True, exist_ok=True)
+
+                        if backup_path.exists():
+                            console.print(
+                                f"[yellow]Warning: Backup path already exists: {backup_path}[/yellow]"
+                            )
+                            # Create a unique name by appending a number
+                            counter = 1
+                            while backup_path.exists():
+                                new_name = f"{backup_path.name}_{counter}"
+                                backup_path = backup_path.parent / new_name
+                                counter += 1
+                            console.print(
+                                f"[blue]Using alternate path: {backup_path}[/blue]"
+                            )
+
+                        shutil.move(str(dir_path), str(backup_path))
+                    except Exception as e:
+                        console.print(
+                            f"[red]Error moving directory {dir_path}: {e}[/red]"
+                        )
+
+            except Exception as e:
+                console.print(f"[red]Error processing directory {dir_path}: {e}[/red]")
+                continue
 
     except Exception as e:
         console.print(f"[red]Error processing directory group {dir_name}: {e}[/red]")

From 47b08d2ed446191978e19813aa1ebfeb21635d98 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:43:59 +0000
Subject: [PATCH 53/66] Nov 17, 2024, 11:43 PM

---
 sample-shrinker-python/sample-shrinker.py | 95 +++++++----------------
 1 file changed, 29 insertions(+), 66 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 135be02..d2964a0 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -970,6 +970,7 @@ def get_interactive_config():
     args.pre_normalize = False
     args.list = False
     args.jobs = 1
+    args.fuzzy_threshold = 90  # Add default fuzzy threshold
 
     if action == "Remove duplicate directories":
         # For duplicate removal, get configuration options
@@ -1026,6 +1027,20 @@ def get_interactive_config():
         args.delete_duplicates = "Delete" in backup_choice
         args.dry_run = "Preview" in backup_choice
 
+        if args.use_fuzzy:
+            # Get fuzzy matching configuration
+            threshold_choice = questionary.select(
+                "Select fuzzy matching threshold (higher = more strict):",
+                choices=[
+                    "95 - Nearly identical",
+                    "90 - Very similar",
+                    "85 - Similar",
+                    "80 - Somewhat similar",
+                ],
+                default="90 - Very similar",
+            ).ask()
+            args.fuzzy_threshold = int(threshold_choice.split()[0])
+
         return "duplicates", args
 
     # For sample shrinking, get all the conversion options
@@ -1127,9 +1142,11 @@ def process_duplicates(args):
         TaskProgressColumn(),
         console=console,
     ) as progress:
-        # Phase 1: Directory scan
+        # Phase 1: Directory scan - Compare directory contents
+        console.print("\n[cyan]Phase 1: Directory Structure Analysis[/cyan]")
         scan_task = progress.add_task(
-            "[magenta]Scanning for duplicate directories...[/magenta]", total=None
+            "[magenta]Scanning for duplicate directory structures...[/magenta]",
+            total=None,
         )
         dir_duplicates = find_duplicate_directories(args.files)
         progress.update(scan_task, completed=True)
@@ -1138,45 +1155,17 @@ def process_duplicates(args):
             count = sum(len(v) - 1 for v in dir_duplicates.values())
             console.print(
                 Panel(
-                    f"Found [cyan]{count}[/cyan] duplicate directories",
-                    title="Directory Scan Complete",
+                    f"Found [cyan]{count}[/cyan] directories with identical contents",
+                    title="Directory Structure Analysis Complete",
                 )
             )
+            # ... rest of directory processing ...
 
-            if args.dry_run:
-                console.print("[yellow]DRY RUN - No directories will be moved[/yellow]")
-
-            # Process directories with progress bar
-            dir_task = progress.add_task(
-                "[green]Processing directories...", total=len(dir_duplicates)
-            )
-
-            with ThreadPoolExecutor(max_workers=args.jobs) as executor:
-                futures = []
-                for (dir_name, file_count, total_size), paths in dir_duplicates.items():
-                    future = executor.submit(
-                        process_directory_group,
-                        dir_name,
-                        file_count,
-                        total_size,
-                        paths,
-                        args,
-                        progress,
-                    )
-                    futures.append(future)
-
-                for future in as_completed(futures):
-                    try:
-                        future.result()
-                        progress.advance(dir_task)
-                    except Exception as e:
-                        console.print(f"[red]Error processing directory: {e}[/red]")
-        else:
-            console.print("[blue]No duplicate directories found.[/blue]")
-
-        # Phase 2: File scan
+        # Phase 2: File scan - Compare individual files
+        console.print("\n[cyan]Phase 2: Individual File Analysis[/cyan]")
         file_task = progress.add_task(
-            "[magenta]Scanning for duplicate files...[/magenta]", total=None
+            "[magenta]Scanning for duplicate files across all directories...[/magenta]",
+            total=None,
         )
         file_duplicates, fuzzy_groups = find_duplicate_files(args.files, args)
         progress.update(file_task, completed=True)
@@ -1187,38 +1176,12 @@ def process_duplicates(args):
                 Panel(
                     f"Found [cyan]{total_duplicates}[/cyan] duplicate files\n"
                     f"Including [cyan]{len(fuzzy_groups)}[/cyan] groups of similar files",
-                    title="File Scan Complete",
+                    title="File Analysis Complete",
                 )
             )
+            # ... rest of file processing ...
 
-            if args.dry_run:
-                console.print("[yellow]DRY RUN - No files will be moved[/yellow]")
-
-            # Process files with progress bar
-            file_process_task = progress.add_task(
-                "[green]Processing files...", total=len(file_duplicates)
-            )
-
-            with ThreadPoolExecutor(max_workers=args.jobs) as executor:
-                futures = []
-                for group in file_duplicates:
-                    future = executor.submit(
-                        process_file_group,
-                        group,
-                        fuzzy_groups,
-                        args,
-                        progress,
-                    )
-                    futures.append(future)
-
-                for future in as_completed(futures):
-                    try:
-                        future.result()
-                        progress.advance(file_process_task)
-                    except Exception as e:
-                        console.print(f"[red]Error processing file group: {e}[/red]")
-
-    console.print("[green]Duplicate removal complete![/green]")
+    console.print("[green]Duplicate analysis and removal complete![/green]")
 
 
 def process_directory_group(dir_name, file_count, total_size, paths, args, progress):

From d9f46342dbf5aa5c39ef21b805207e6a2398102a Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:46:11 +0000
Subject: [PATCH 54/66] Nov 17, 2024, 11:46 PM

---
 sample-shrinker-python/sample-shrinker.py | 102 ++++++++++++++++++----
 1 file changed, 87 insertions(+), 15 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index d2964a0..9ae9c00 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -692,8 +692,14 @@ def get_audio_fingerprint(file_path):
         samples = samples / np.max(np.abs(samples))
 
         # Get a signature using peaks in frequency domain
+        # Adjust nperseg and noverlap based on sample length
+        nperseg = min(1024, len(samples))
+        if nperseg % 2 != 0:  # Make sure nperseg is even
+            nperseg -= 1
+        noverlap = nperseg // 2  # Set noverlap to half of nperseg
+
         freqs, times, spectrogram = scipy.signal.spectrogram(
-            samples, audio.frame_rate, nperseg=1024, noverlap=512
+            samples, audio.frame_rate, nperseg=nperseg, noverlap=noverlap
         )
 
         # Get the strongest frequencies
@@ -703,7 +709,9 @@ def get_audio_fingerprint(file_path):
 
         return peaks
     except Exception as e:
-        print(f"Error generating audio fingerprint for {file_path}: {e}")
+        console.print(
+            f"[yellow]Error generating audio fingerprint for {file_path}: {e}[/yellow]"
+        )
         return None
 
 
@@ -1135,6 +1143,8 @@ def get_interactive_config():
 
 def process_duplicates(args):
     """Process both directory and file level duplicates with visual feedback."""
+    # Phase 1: Directory scan - Compare directory contents
+    console.print("\n[cyan]Phase 1: Directory Structure Analysis[/cyan]")
     with Progress(
         SpinnerColumn(),
         TextColumn("[progress.description]{task.description}"),
@@ -1142,8 +1152,6 @@ def process_duplicates(args):
         TaskProgressColumn(),
         console=console,
     ) as progress:
-        # Phase 1: Directory scan - Compare directory contents
-        console.print("\n[cyan]Phase 1: Directory Structure Analysis[/cyan]")
         scan_task = progress.add_task(
             "[magenta]Scanning for duplicate directory structures...[/magenta]",
             total=None,
@@ -1151,18 +1159,59 @@ def process_duplicates(args):
         dir_duplicates = find_duplicate_directories(args.files)
         progress.update(scan_task, completed=True)
 
-        if dir_duplicates:
-            count = sum(len(v) - 1 for v in dir_duplicates.values())
-            console.print(
-                Panel(
-                    f"Found [cyan]{count}[/cyan] directories with identical contents",
-                    title="Directory Structure Analysis Complete",
-                )
+    if dir_duplicates:
+        count = sum(len(v) - 1 for v in dir_duplicates.values())
+        console.print(
+            Panel(
+                f"Found [cyan]{count}[/cyan] directories with identical contents",
+                title="Directory Structure Analysis Complete",
             )
-            # ... rest of directory processing ...
+        )
+        if not args.dry_run:
+            with Progress(
+                SpinnerColumn(),
+                TextColumn("[progress.description]{task.description}"),
+                BarColumn(),
+                TaskProgressColumn(),
+                console=console,
+            ) as progress:
+                dir_task = progress.add_task(
+                    "[green]Processing directories...", total=len(dir_duplicates)
+                )
+                with ThreadPoolExecutor(max_workers=args.jobs) as executor:
+                    futures = []
+                    for (
+                        dir_name,
+                        file_count,
+                        total_size,
+                    ), paths in dir_duplicates.items():
+                        future = executor.submit(
+                            process_directory_group,
+                            dir_name,
+                            file_count,
+                            total_size,
+                            paths,
+                            args,
+                            progress,
+                        )
+                        futures.append(future)
+
+                    for future in as_completed(futures):
+                        try:
+                            future.result()
+                            progress.advance(dir_task)
+                        except Exception as e:
+                            console.print(f"[red]Error processing directory: {e}[/red]")
 
-        # Phase 2: File scan - Compare individual files
-        console.print("\n[cyan]Phase 2: Individual File Analysis[/cyan]")
+    # Phase 2: File scan - Compare individual files
+    console.print("\n[cyan]Phase 2: Individual File Analysis[/cyan]")
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TaskProgressColumn(),
+        console=console,
+    ) as progress:
         file_task = progress.add_task(
             "[magenta]Scanning for duplicate files across all directories...[/magenta]",
             total=None,
@@ -1179,7 +1228,30 @@ def process_duplicates(args):
                     title="File Analysis Complete",
                 )
             )
-            # ... rest of file processing ...
+            if not args.dry_run:
+                file_process_task = progress.add_task(
+                    "[green]Processing files...", total=len(file_duplicates)
+                )
+                with ThreadPoolExecutor(max_workers=args.jobs) as executor:
+                    futures = []
+                    for group in file_duplicates:
+                        future = executor.submit(
+                            process_file_group,
+                            group,
+                            fuzzy_groups,
+                            args,
+                            progress,
+                        )
+                        futures.append(future)
+
+                    for future in as_completed(futures):
+                        try:
+                            future.result()
+                            progress.advance(file_process_task)
+                        except Exception as e:
+                            console.print(
+                                f"[red]Error processing file group: {e}[/red]"
+                            )
 
     console.print("[green]Duplicate analysis and removal complete![/green]")
 

From c752c64b879d5cbffab4c25e8456e0ea9dee54ba Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:47:46 +0000
Subject: [PATCH 55/66] Nov 17, 2024, 11:47 PM

---
 sample-shrinker-python/sample-shrinker.py | 100 +++++++++++++++-------
 1 file changed, 67 insertions(+), 33 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 9ae9c00..55e90c7 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -732,10 +732,10 @@ def compare_audio_similarity(file1_fingerprint, file2_fingerprint):
     return similarity if not np.isnan(similarity) else 0
 
 
-def find_duplicate_files(paths, args):
+def find_duplicate_files(paths, args, progress, task_id):
     """Find duplicate files using a multi-stage approach with audio fingerprinting."""
-    print("Scanning for duplicate files...")
     size_groups = defaultdict(list)
+    scanned = 0
 
     # First pass: group by size
     for path in paths:
@@ -743,8 +743,12 @@ def find_duplicate_files(paths, args):
         if path.is_dir():
             for file_path in path.rglob("*"):
                 if file_path.is_file() and is_audio_file(str(file_path)):
+                    # Update progress
+                    scanned += 1
+                    progress.update(task_id, completed=scanned)
+
                     if args.verbose:
-                        print(f"Scanning: {file_path}")
+                        console.print(f"Scanning: {file_path}")
                     size = file_path.stat().st_size
                     size_groups[size].append(file_path)
 
@@ -864,15 +868,20 @@ def process_duplicate_files(duplicates, fuzzy_groups, args):
                     print(f"Error moving file {file_path}: {e}")
 
 
-def find_duplicate_directories(paths):
+def find_duplicate_directories(paths, progress, task_id):
     """Find directories with matching names and file counts."""
     dir_map = defaultdict(list)
+    scanned = 0
 
     for path in paths:
         path = Path(path)
         if path.is_dir():
             for dir_path in path.rglob("*"):
                 if dir_path.is_dir():
+                    # Update progress
+                    scanned += 1
+                    progress.update(task_id, completed=scanned)
+
                     # Get directory name, file count, and total size
                     dir_name = dir_path.name.lower()  # Case-insensitive comparison
                     files = list(dir_path.glob("*"))
@@ -1150,14 +1159,21 @@ def process_duplicates(args):
         TextColumn("[progress.description]{task.description}"),
         BarColumn(),
         TaskProgressColumn(),
+        TextColumn("{task.completed}/{task.total} directories"),
         console=console,
     ) as progress:
+        # First count total directories for progress
+        total_dirs = sum(
+            1 for path in args.files for _ in path.rglob("*") if path.is_dir()
+        )
         scan_task = progress.add_task(
             "[magenta]Scanning for duplicate directory structures...[/magenta]",
-            total=None,
+            total=total_dirs,
         )
-        dir_duplicates = find_duplicate_directories(args.files)
-        progress.update(scan_task, completed=True)
+
+        # Modify find_duplicate_directories to update progress
+        dir_duplicates = find_duplicate_directories(args.files, progress, scan_task)
+        progress.update(scan_task, completed=total_dirs)
 
     if dir_duplicates:
         count = sum(len(v) - 1 for v in dir_duplicates.values())
@@ -1173,10 +1189,11 @@ def process_duplicates(args):
                 TextColumn("[progress.description]{task.description}"),
                 BarColumn(),
                 TaskProgressColumn(),
+                TextColumn("{task.completed}/{task.total} duplicates"),
                 console=console,
             ) as progress:
                 dir_task = progress.add_task(
-                    "[green]Processing directories...", total=len(dir_duplicates)
+                    "[green]Processing directories...", total=count
                 )
                 with ThreadPoolExecutor(max_workers=args.jobs) as executor:
                     futures = []
@@ -1210,14 +1227,23 @@ def process_duplicates(args):
         TextColumn("[progress.description]{task.description}"),
         BarColumn(),
         TaskProgressColumn(),
+        TextColumn("{task.completed}/{task.total} files"),
         console=console,
     ) as progress:
+        # First count total files for progress
+        total_files = sum(
+            1 for path in args.files for _ in path.rglob("*") if path.is_file()
+        )
         file_task = progress.add_task(
             "[magenta]Scanning for duplicate files across all directories...[/magenta]",
-            total=None,
+            total=total_files,
+        )
+
+        # Modify find_duplicate_files to update progress
+        file_duplicates, fuzzy_groups = find_duplicate_files(
+            args.files, args, progress, file_task
         )
-        file_duplicates, fuzzy_groups = find_duplicate_files(args.files, args)
-        progress.update(file_task, completed=True)
+        progress.update(file_task, completed=total_files)
 
         if file_duplicates:
             total_duplicates = sum(len(group) - 1 for group in file_duplicates)
@@ -1229,29 +1255,37 @@ def process_duplicates(args):
                 )
             )
             if not args.dry_run:
-                file_process_task = progress.add_task(
-                    "[green]Processing files...", total=len(file_duplicates)
-                )
-                with ThreadPoolExecutor(max_workers=args.jobs) as executor:
-                    futures = []
-                    for group in file_duplicates:
-                        future = executor.submit(
-                            process_file_group,
-                            group,
-                            fuzzy_groups,
-                            args,
-                            progress,
-                        )
-                        futures.append(future)
-
-                    for future in as_completed(futures):
-                        try:
-                            future.result()
-                            progress.advance(file_process_task)
-                        except Exception as e:
-                            console.print(
-                                f"[red]Error processing file group: {e}[/red]"
+                with Progress(
+                    SpinnerColumn(),
+                    TextColumn("[progress.description]{task.description}"),
+                    BarColumn(),
+                    TaskProgressColumn(),
+                    TextColumn("{task.completed}/{task.total} duplicates"),
+                    console=console,
+                ) as progress:
+                    file_process_task = progress.add_task(
+                        "[green]Processing files...", total=total_duplicates
+                    )
+                    with ThreadPoolExecutor(max_workers=args.jobs) as executor:
+                        futures = []
+                        for group in file_duplicates:
+                            future = executor.submit(
+                                process_file_group,
+                                group,
+                                fuzzy_groups,
+                                args,
+                                progress,
                             )
+                            futures.append(future)
+
+                        for future in as_completed(futures):
+                            try:
+                                future.result()
+                                progress.advance(file_process_task)
+                            except Exception as e:
+                                console.print(
+                                    f"[red]Error processing file group: {e}[/red]"
+                                )
 
     console.print("[green]Duplicate analysis and removal complete![/green]")
 

From 8ea5496e23e1fe7ca0e49f03a383d4fb691a2ba0 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:48:17 +0000
Subject: [PATCH 56/66] Nov 17, 2024, 11:48 PM

---
 sample-shrinker-python/sample-shrinker.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 55e90c7..5daed49 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -873,8 +873,8 @@ def find_duplicate_directories(paths, progress, task_id):
     dir_map = defaultdict(list)
     scanned = 0
 
-    for path in paths:
-        path = Path(path)
+    for path_str in paths:
+        path = Path(path_str)  # Convert string to Path
         if path.is_dir():
             for dir_path in path.rglob("*"):
                 if dir_path.is_dir():
@@ -1164,7 +1164,10 @@ def process_duplicates(args):
     ) as progress:
         # First count total directories for progress
         total_dirs = sum(
-            1 for path in args.files for _ in path.rglob("*") if path.is_dir()
+            1
+            for path_str in args.files
+            for _ in Path(path_str).rglob("*")
+            if Path(path_str).is_dir()
         )
         scan_task = progress.add_task(
             "[magenta]Scanning for duplicate directory structures...[/magenta]",

From 9e4e14db937ca0626a17edeca707f6bdda5d4133 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:50:12 +0000
Subject: [PATCH 57/66] Nov 17, 2024, 11:50 PM

---
 sample-shrinker-python/sample-shrinker.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 5daed49..24494cc 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -1235,7 +1235,10 @@ def process_duplicates(args):
     ) as progress:
         # First count total files for progress
         total_files = sum(
-            1 for path in args.files for _ in path.rglob("*") if path.is_file()
+            1
+            for path_str in args.files
+            for _ in Path(path_str).rglob("*")
+            if Path(_).is_file()  # Check if the found item is a file
         )
         file_task = progress.add_task(
             "[magenta]Scanning for duplicate files across all directories...[/magenta]",

From 781723bc0ab77331cbc8e1c1155a93d094f03b37 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 07:57:18 +0000
Subject: [PATCH 58/66] Nov 17, 2024, 11:57 PM

---
 sample-shrinker-python/sample-shrinker.py | 187 ++++++++++++++++------
 1 file changed, 141 insertions(+), 46 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 24494cc..2f1f42e 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -1225,6 +1225,8 @@ def process_duplicates(args):
 
     # Phase 2: File scan - Compare individual files
     console.print("\n[cyan]Phase 2: Individual File Analysis[/cyan]")
+
+    # Step 1: Initial file scanning
     with Progress(
         SpinnerColumn(),
         TextColumn("[progress.description]{task.description}"),
@@ -1233,65 +1235,158 @@ def process_duplicates(args):
         TextColumn("{task.completed}/{task.total} files"),
         console=console,
     ) as progress:
-        # First count total files for progress
         total_files = sum(
             1
             for path_str in args.files
             for _ in Path(path_str).rglob("*")
-            if Path(_).is_file()  # Check if the found item is a file
+            if Path(_).is_file()
         )
-        file_task = progress.add_task(
-            "[magenta]Scanning for duplicate files across all directories...[/magenta]",
+        scan_task = progress.add_task(
+            "[magenta]Scanning filesystem for files...[/magenta]",
             total=total_files,
         )
 
-        # Modify find_duplicate_files to update progress
-        file_duplicates, fuzzy_groups = find_duplicate_files(
-            args.files, args, progress, file_task
+        # First pass: collect files and group by size
+        size_groups = defaultdict(list)
+        scanned = 0
+        for path_str in args.files:
+            path = Path(path_str)
+            if path.is_dir():
+                for file_path in path.rglob("*"):
+                    if file_path.is_file() and is_audio_file(str(file_path)):
+                        scanned += 1
+                        progress.update(scan_task, completed=scanned)
+                        size_groups[file_path.stat().st_size].append(file_path)
+
+    # Step 2: Similarity analysis
+    potential_duplicates = {
+        size: files for size, files in size_groups.items() if len(files) > 1
+    }
+    total_to_check = sum(len(files) for files in potential_duplicates.values())
+
+    file_duplicates = []
+    fuzzy_groups = []
+
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TaskProgressColumn(),
+        TextColumn("{task.completed}/{task.total} files"),
+        console=console,
+    ) as progress:
+        check_task = progress.add_task(
+            "[magenta]Analyzing files for duplicates...[/magenta]",
+            total=total_to_check,
         )
-        progress.update(file_task, completed=total_files)
 
-        if file_duplicates:
-            total_duplicates = sum(len(group) - 1 for group in file_duplicates)
-            console.print(
-                Panel(
-                    f"Found [cyan]{total_duplicates}[/cyan] duplicate files\n"
-                    f"Including [cyan]{len(fuzzy_groups)}[/cyan] groups of similar files",
-                    title="File Analysis Complete",
+        checked = 0
+        for size, file_paths in potential_duplicates.items():
+            if args.verbose:
+                console.print(
+                    f"\nChecking {len(file_paths)} files of size {size} bytes..."
                 )
+
+            # Group files by hash first
+            hash_groups = defaultdict(list)
+            for file_path in file_paths:
+                try:
+                    file_hash = get_file_hash(file_path, fuzzy=False)
+                    if args.ignore_names:
+                        hash_groups[file_hash].append(file_path)
+                    else:
+                        name_key = file_path.stem.lower()
+                        hash_groups[(name_key, file_hash)].append(file_path)
+                    checked += 1
+                    progress.update(check_task, completed=checked)
+                except Exception as e:
+                    console.print(f"[red]Error hashing file {file_path}: {e}[/red]")
+
+            # Add exact matches to results
+            for group in hash_groups.values():
+                if len(group) > 1:
+                    file_duplicates.append(group)
+
+            # Check for similar audio content if enabled
+            if args.use_fuzzy:
+                # Get unmatched files (not in any exact match group)
+                unmatched = [
+                    f for f in file_paths if not any(f in g for g in file_duplicates)
+                ]
+
+                if len(unmatched) > 1:
+                    fingerprints = {}
+                    for file_path in unmatched:
+                        fingerprint = get_audio_fingerprint(file_path)
+                        if fingerprint is not None:
+                            fingerprints[file_path] = fingerprint
+
+                    # Compare fingerprints
+                    processed = set()
+                    for file1 in fingerprints:
+                        if file1 in processed:
+                            continue
+
+                        similar_files = [file1]
+                        for file2 in fingerprints:
+                            if file2 != file1 and file2 not in processed:
+                                similarity = compare_audio_similarity(
+                                    fingerprints[file1], fingerprints[file2]
+                                )
+                                if similarity >= args.fuzzy_threshold:
+                                    similar_files.append(file2)
+                                    processed.add(file2)
+
+                        if len(similar_files) > 1:
+                            fuzzy_groups.append(similar_files)
+                            file_duplicates.append(similar_files)
+                            processed.add(file1)
+
+    # Report results and process duplicates
+    if file_duplicates:
+        total_duplicates = sum(len(group) - 1 for group in file_duplicates)
+        console.print(
+            Panel(
+                f"Found [cyan]{total_duplicates}[/cyan] duplicate files\n"
+                f"Including [cyan]{len(fuzzy_groups)}[/cyan] groups of similar files",
+                title="File Analysis Complete",
             )
-            if not args.dry_run:
-                with Progress(
-                    SpinnerColumn(),
-                    TextColumn("[progress.description]{task.description}"),
-                    BarColumn(),
-                    TaskProgressColumn(),
-                    TextColumn("{task.completed}/{task.total} duplicates"),
-                    console=console,
-                ) as progress:
-                    file_process_task = progress.add_task(
-                        "[green]Processing files...", total=total_duplicates
-                    )
-                    with ThreadPoolExecutor(max_workers=args.jobs) as executor:
-                        futures = []
-                        for group in file_duplicates:
-                            future = executor.submit(
-                                process_file_group,
-                                group,
-                                fuzzy_groups,
-                                args,
-                                progress,
+        )
+
+        # Step 3: Process duplicates if not in dry run mode
+        if not args.dry_run:
+            with Progress(
+                SpinnerColumn(),
+                TextColumn("[progress.description]{task.description}"),
+                BarColumn(),
+                TaskProgressColumn(),
+                TextColumn("{task.completed}/{task.total} duplicates"),
+                console=console,
+            ) as progress:
+                process_task = progress.add_task(
+                    "[green]Processing duplicate files...", total=total_duplicates
+                )
+
+                with ThreadPoolExecutor(max_workers=args.jobs) as executor:
+                    futures = []
+                    for group in file_duplicates:
+                        future = executor.submit(
+                            process_file_group,
+                            group,
+                            fuzzy_groups,
+                            args,
+                            progress,
+                        )
+                        futures.append(future)
+
+                    for future in as_completed(futures):
+                        try:
+                            future.result()
+                            progress.advance(process_task)
+                        except Exception as e:
+                            console.print(
+                                f"[red]Error processing file group: {e}[/red]"
                             )
-                            futures.append(future)
-
-                        for future in as_completed(futures):
-                            try:
-                                future.result()
-                                progress.advance(file_process_task)
-                            except Exception as e:
-                                console.print(
-                                    f"[red]Error processing file group: {e}[/red]"
-                                )
 
     console.print("[green]Duplicate analysis and removal complete![/green]")
 

From a2966c78a1f77fca29dbc8b5f9281761a418f937 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 08:15:01 +0000
Subject: [PATCH 59/66] Nov 18, 2024, 12:15 AM

---
 sample-shrinker-python/sample-shrinker.py | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 2f1f42e..ee04b5f 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -1403,13 +1403,21 @@ def process_directory_group(dir_name, file_count, total_size, paths, args, progr
         valid_paths = []
         for path in paths:
             try:
-                if not path.exists():
+                # Wait briefly for cloud storage to download if needed
+                retries = 3
+                while retries > 0:
+                    if path.exists():
+                        stat = path.stat()
+                        valid_paths.append((path, stat.st_ctime))
+                        break
+                    retries -= 1
+                    if retries > 0:
+                        time.sleep(1)  # Wait a second before retry
+
+                if retries == 0:
                     console.print(
-                        f"[yellow]Warning: Directory not found: {path}[/yellow]"
+                        f"[yellow]Warning: Directory not available after retries: {path}[/yellow]"
                     )
-                    continue
-                stat = path.stat()
-                valid_paths.append((path, stat.st_ctime))
             except (FileNotFoundError, OSError) as e:
                 console.print(
                     f"[yellow]Warning: Cannot access directory {path}: {e}[/yellow]"
@@ -1432,9 +1440,10 @@ def process_directory_group(dir_name, file_count, total_size, paths, args, progr
         # Process newer copies
         for dir_path, ctime in valid_paths[1:]:
             try:
+                # Check again before processing as cloud storage might have changed
                 if not dir_path.exists():
                     console.print(
-                        f"[yellow]Warning: Directory disappeared: {dir_path}[/yellow]"
+                        f"[yellow]Skipping unavailable directory: {dir_path}[/yellow]"
                     )
                     continue
 

From 7703ac3318468e2a40ad17b78be4372052ef3a3e Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 08:16:59 +0000
Subject: [PATCH 60/66] Nov 18, 2024, 12:16 AM

---
 sample-shrinker-python/sample-shrinker.py | 32 ++++++++++++++++++++---
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index ee04b5f..5d59135 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -1440,7 +1440,7 @@ def process_directory_group(dir_name, file_count, total_size, paths, args, progr
         # Process newer copies
         for dir_path, ctime in valid_paths[1:]:
             try:
-                # Check again before processing as cloud storage might have changed
+                # First verify source exists
                 if not dir_path.exists():
                     console.print(
                         f"[yellow]Skipping unavailable directory: {dir_path}[/yellow]"
@@ -1458,9 +1458,17 @@ def process_directory_group(dir_name, file_count, total_size, paths, args, progr
                         rel_path = dir_path.relative_to(dir_path.parent.parent)
                         backup_path = Path(args.backup_dir) / rel_path
 
-                        # Ensure backup directory exists
+                        # IMPORTANT: Create ALL parent directories first
                         backup_path.parent.mkdir(parents=True, exist_ok=True)
 
+                        # Verify the backup path is valid before attempting move
+                        if not backup_path.parent.exists():
+                            console.print(
+                                f"[red]Error: Backup directory could not be created: {backup_path.parent}[/red]"
+                            )
+                            continue
+
+                        # Check if destination already exists
                         if backup_path.exists():
                             console.print(
                                 f"[yellow]Warning: Backup path already exists: {backup_path}[/yellow]"
@@ -1475,10 +1483,26 @@ def process_directory_group(dir_name, file_count, total_size, paths, args, progr
                                 f"[blue]Using alternate path: {backup_path}[/blue]"
                             )
 
-                        shutil.move(str(dir_path), str(backup_path))
+                        # Do the move
+                        try:
+                            shutil.move(str(dir_path), str(backup_path))
+                        except Exception as move_error:
+                            console.print(
+                                f"[red]Error moving {dir_path} to {backup_path}: {move_error}[/red]"
+                            )
+                            # Try to provide more context about the error
+                            if not dir_path.exists():
+                                console.print(
+                                    "[red]Source directory no longer exists[/red]"
+                                )
+                            if not backup_path.parent.exists():
+                                console.print(
+                                    "[red]Destination directory does not exist[/red]"
+                                )
+
                     except Exception as e:
                         console.print(
-                            f"[red]Error moving directory {dir_path}: {e}[/red]"
+                            f"[red]Error setting up backup path for {dir_path}: {e}[/red]"
                         )
 
             except Exception as e:

From 7feb4a25598f0a24c908dc0b49e9493a72d529e6 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 08:18:59 +0000
Subject: [PATCH 61/66] Nov 18, 2024, 12:18 AM

---
 sample-shrinker-python/sample-shrinker.py | 222 +++++++++++++++-------
 1 file changed, 149 insertions(+), 73 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 5d59135..12fc519 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -930,13 +930,13 @@ def process_duplicate_directories(duplicates, args):
 
 def get_interactive_config():
     """Get configuration through interactive questionary prompts."""
-
     # First, get the action type
     action = questionary.select(
         "What would you like to do?",
         choices=[
             "Shrink samples (convert audio files)",
             "Remove duplicate directories",
+            "Restore from backup",
             "Exit",
         ],
     ).ask()
@@ -971,46 +971,38 @@ def get_interactive_config():
     args = argparse.Namespace()
     args.files = paths
 
-    # Set ALL default values (matching parse_args defaults)
-    args.backup_dir = "_backup"
-    args.dry_run = False
-    args.verbose = False
-    args.ext = "wav,mp3"
-    args.bitdepth = 16
-    args.min_bitdepth = None
-    args.channels = 2
-    args.samplerate = 44100
-    args.min_samplerate = None
-    args.auto_mono = False
-    args.auto_mono_threshold = -95.5
-    args.skip_spectrograms = False
-    args.pre_normalize = False
-    args.list = False
-    args.jobs = 1
-    args.fuzzy_threshold = 90  # Add default fuzzy threshold
-
-    if action == "Remove duplicate directories":
-        # For duplicate removal, get configuration options
-        duplicate_options = questionary.checkbox(
-            "Select duplicate removal options:",
+    if action == "Restore from backup":
+        # Get backup directory
+        args.backup_dir = questionary.path(
+            "Select backup directory to restore from:",
+            only_directories=True,
+            default="_backup",
+        ).ask()
+
+        # Get file extensions to restore
+        args.restore_ext = questionary.text(
+            "Enter file extensions to restore (comma-separated, e.g., wav,mp3):",
+            default="wav,mp3",
+        ).ask()
+
+        # Get restore options
+        restore_options = questionary.checkbox(
+            "Select restore options:",
             choices=[
-                "Use fuzzy matching for similar files",
-                "Ignore filenames (match by content only)",
                 "Preview changes (dry run)",
                 "Show detailed progress",
                 "Process files in parallel",
+                "Skip existing files",
+                "Overwrite existing files",
             ],
         ).ask()
 
-        args.use_fuzzy = "Use fuzzy matching for similar files" in duplicate_options
-        args.ignore_names = (
-            "Ignore filenames (match by content only)" in duplicate_options
-        )
-        args.dry_run = "Preview changes (dry run)" in duplicate_options
-        args.verbose = "Show detailed progress" in duplicate_options
+        args.dry_run = "Preview changes (dry run)" in restore_options
+        args.verbose = "Show detailed progress" in restore_options
+        args.skip_existing = "Skip existing files" in restore_options
+        args.overwrite = "Overwrite existing files" in restore_options
 
-        # Add parallel processing configuration
-        if "Process files in parallel" in duplicate_options:
+        if "Process files in parallel" in restore_options:
             args.jobs = questionary.select(
                 "How many parallel jobs?",
                 choices=["2", "4", "8", "16", "24", "32", "48", "64"],
@@ -1020,45 +1012,7 @@ def get_interactive_config():
         else:
             args.jobs = 1
 
-        # Get backup options (modified text prompt)
-        backup_dir = questionary.text(
-            "Backup directory path (where duplicates will be moved):",
-            default="_backup",
-        ).ask()
-
-        if backup_dir.strip():  # If not empty
-            args.backup_dir = backup_dir.strip()
-        else:
-            args.backup_dir = "_backup"  # Fallback to default
-
-        backup_choice = questionary.select(
-            "How should duplicates be handled?",
-            choices=[
-                f"Move to {args.backup_dir} (safe)",
-                "Delete immediately (dangerous)",
-                "Preview only (no changes)",
-            ],
-            default=f"Move to {args.backup_dir} (safe)",
-        ).ask()
-
-        args.delete_duplicates = "Delete" in backup_choice
-        args.dry_run = "Preview" in backup_choice
-
-        if args.use_fuzzy:
-            # Get fuzzy matching configuration
-            threshold_choice = questionary.select(
-                "Select fuzzy matching threshold (higher = more strict):",
-                choices=[
-                    "95 - Nearly identical",
-                    "90 - Very similar",
-                    "85 - Similar",
-                    "80 - Somewhat similar",
-                ],
-                default="90 - Very similar",
-            ).ask()
-            args.fuzzy_threshold = int(threshold_choice.split()[0])
-
-        return "duplicates", args
+        return "restore", args
 
     # For sample shrinking, get all the conversion options
     args.bitdepth = questionary.select(
@@ -1591,6 +1545,126 @@ def process_file_group(group, fuzzy_groups, args, progress):
         raise
 
 
+def restore_from_backup(args):
+    """Restore files from backup to their original locations."""
+    console.print("\n[cyan]Starting Backup Restore Process[/cyan]")
+
+    backup_path = Path(args.backup_dir)
+    if not backup_path.exists():
+        console.print(f"[red]Error: Backup directory {backup_path} not found[/red]")
+        return
+
+    # Get list of extensions to restore
+    extensions = [ext.strip().lower() for ext in args.restore_ext.split(",")]
+
+    # Step 1: Scan backup directory
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TaskProgressColumn(),
+        TextColumn("{task.completed}/{task.total} files"),
+        console=console,
+    ) as progress:
+        scan_task = progress.add_task(
+            "[magenta]Scanning backup directory...[/magenta]", total=None
+        )
+
+        # Collect all files to restore
+        restore_files = []
+        for ext in extensions:
+            for file_path in backup_path.rglob(f"*.{ext}"):
+                try:
+                    # Calculate original path
+                    rel_path = file_path.relative_to(backup_path)
+                    target_path = Path(args.files[0]) / rel_path
+                    restore_files.append((file_path, target_path))
+                except Exception as e:
+                    console.print(f"[yellow]Error processing {file_path}: {e}[/yellow]")
+
+        progress.update(
+            scan_task, total=len(restore_files), completed=len(restore_files)
+        )
+
+    # Report findings
+    console.print(
+        Panel(
+            f"Found [cyan]{len(restore_files)}[/cyan] files to restore",
+            title="Backup Scan Complete",
+        )
+    )
+
+    if not restore_files:
+        return
+
+    # Step 2: Restore files
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TaskProgressColumn(),
+        TextColumn("{task.completed}/{task.total} files"),
+        console=console,
+    ) as progress:
+        restore_task = progress.add_task(
+            "[green]Restoring files...[/green]", total=len(restore_files)
+        )
+
+        with ThreadPoolExecutor(max_workers=args.jobs) as executor:
+            futures = []
+            for backup_file, target_path in restore_files:
+                future = executor.submit(
+                    restore_single_file,
+                    backup_file,
+                    target_path,
+                    args,
+                )
+                futures.append(future)
+
+            for future in as_completed(futures):
+                try:
+                    future.result()
+                    progress.advance(restore_task)
+                except Exception as e:
+                    console.print(f"[red]Error during restore: {e}[/red]")
+
+    console.print("[green]Restore process complete![/green]")
+
+
+def restore_single_file(backup_file, target_path, args):
+    """Restore a single file from backup to its original location."""
+    try:
+        if args.verbose:
+            console.print(f"Processing: {backup_file} -> {target_path}")
+
+        if target_path.exists():
+            if args.skip_existing:
+                if args.verbose:
+                    console.print(
+                        f"[yellow]Skipping existing file: {target_path}[/yellow]"
+                    )
+                return
+            elif not args.overwrite:
+                console.print(
+                    f"[yellow]Target exists (skipping): {target_path}[/yellow]"
+                )
+                return
+
+        if not args.dry_run:
+            # Create target directory if it doesn't exist
+            target_path.parent.mkdir(parents=True, exist_ok=True)
+
+            # Copy the file with metadata preserved
+            shutil.copy2(backup_file, target_path)
+
+            if args.verbose:
+                console.print(f"[green]Restored: {target_path}[/green]")
+
+    except Exception as e:
+        console.print(f"[red]Error restoring {backup_file}: {e}[/red]")
+        raise
+
+
 def main():
     # Check for ffmpeg first
     if not check_ffmpeg():
@@ -1607,7 +1681,9 @@ def main():
     if not args:
         return
 
-    if action == "duplicates":
+    if action == "restore":
+        restore_from_backup(args)
+    elif action == "duplicates":
         process_duplicates(args)
     else:  # Shrink samples
         # Delete all '._' files before processing anything

From 72472a8079116eb6999bb1b55126262d92dcb301 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 08:22:24 +0000
Subject: [PATCH 62/66] Nov 18, 2024, 12:22 AM

---
 sample-shrinker-python/sample-shrinker.py | 226 ++++++++++++++--------
 1 file changed, 143 insertions(+), 83 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 12fc519..52980d8 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -935,7 +935,7 @@ def get_interactive_config():
         "What would you like to do?",
         choices=[
             "Shrink samples (convert audio files)",
-            "Remove duplicate directories",
+            "Remove duplicate files and directories",
             "Restore from backup",
             "Exit",
         ],
@@ -971,7 +971,66 @@ def get_interactive_config():
     args = argparse.Namespace()
     args.files = paths
 
-    if action == "Restore from backup":
+    # Set default values that all modes need
+    args.dry_run = False
+    args.verbose = False
+    args.jobs = 1
+
+    if action == "Remove duplicate files and directories":
+        # For duplicate removal, get configuration options
+        duplicate_options = questionary.checkbox(
+            "Select duplicate removal options:",
+            choices=[
+                "Use fuzzy matching for similar files",
+                "Ignore filenames (match by content only)",
+                "Preview changes (dry run)",
+                "Show detailed progress",
+                "Process files in parallel",
+            ],
+        ).ask()
+
+        args.use_fuzzy = "Use fuzzy matching for similar files" in duplicate_options
+        args.ignore_names = (
+            "Ignore filenames (match by content only)" in duplicate_options
+        )
+        args.dry_run = "Preview changes (dry run)" in duplicate_options
+        args.verbose = "Show detailed progress" in duplicate_options
+
+        if "Process files in parallel" in duplicate_options:
+            args.jobs = questionary.select(
+                "How many parallel jobs?",
+                choices=["2", "4", "8", "16", "24", "32", "48", "64"],
+                default="4",
+            ).ask()
+            args.jobs = int(args.jobs)
+
+        # Get backup options
+        args.backup_dir = questionary.text(
+            "Backup directory path (where duplicates will be moved):",
+            default="_backup",
+        ).ask()
+
+        if args.backup_dir.strip():  # If not empty
+            args.backup_dir = args.backup_dir.strip()
+        else:
+            args.backup_dir = "_backup"  # Fallback to default
+
+        if args.use_fuzzy:
+            threshold_choice = questionary.select(
+                "Select fuzzy matching threshold (higher = more strict):",
+                choices=[
+                    "95 - Nearly identical",
+                    "90 - Very similar",
+                    "85 - Similar",
+                    "80 - Somewhat similar",
+                ],
+                default="90 - Very similar",
+            ).ask()
+            args.fuzzy_threshold = int(threshold_choice.split()[0])
+
+        return "duplicates", args
+
+    elif action == "Restore from backup":
         # Get backup directory
         args.backup_dir = questionary.path(
             "Select backup directory to restore from:",
@@ -1009,99 +1068,100 @@ def get_interactive_config():
                 default="4",
             ).ask()
             args.jobs = int(args.jobs)
-        else:
-            args.jobs = 1
 
         return "restore", args
 
-    # For sample shrinking, get all the conversion options
-    args.bitdepth = questionary.select(
-        "Select target bit depth:", choices=["8", "16", "24"], default="16"
-    ).ask()
-    args.bitdepth = int(args.bitdepth)
-
-    args.channels = questionary.select(
-        "Select target channels:",
-        choices=["1 (mono)", "2 (stereo)"],
-        default="2 (stereo)",
-    ).ask()
-    args.channels = 1 if "1" in args.channels else 2
-
-    args.samplerate = questionary.select(
-        "Select target sample rate:",
-        choices=["22050", "44100", "48000"],
-        default="44100",
-    ).ask()
-    args.samplerate = int(args.samplerate)
-
-    # Advanced options in a checkbox group
-    advanced_options = questionary.checkbox(
-        "Select additional options:",
-        choices=[
-            "Auto-convert stereo to mono when possible",
-            "Pre-normalize before conversion",
-            "Skip generating spectrograms",
-            "Preview changes (dry run)",
-            "Process files in parallel",
-            "Set minimum sample rate",
-            "Set minimum bit depth",
-            "Convert in place (no backups)",
-        ],
-    ).ask()
-
-    args.auto_mono = "Auto-convert stereo to mono when possible" in advanced_options
-    args.pre_normalize = "Pre-normalize before conversion" in advanced_options
-    args.skip_spectrograms = "Skip generating spectrograms" in advanced_options
-    args.dry_run = "Preview changes (dry run)" in advanced_options
-    convert_in_place = "Convert in place (no backups)" in advanced_options
-
-    # Configure backup settings if not converting in place
-    if not convert_in_place:
-        args.backup_dir = questionary.text(
-            "Backup directory path:",
-            default="_backup",
+    elif action == "Shrink samples (convert audio files)":
+        # For sample shrinking, get all the conversion options
+        args.bitdepth = questionary.select(
+            "Select target bit depth:", choices=["8", "16", "24"], default="16"
         ).ask()
-        if args.backup_dir.strip():  # If not empty
-            args.backup_dir = args.backup_dir.strip()
-            # Only ask about spectrograms if they weren't explicitly skipped in advanced options
-            if not args.skip_spectrograms:
-                args.skip_spectrograms = not questionary.confirm(
-                    "Generate spectrograms for backup comparison?", default=False
-                ).ask()
-        else:
-            args.backup_dir = "-"
-            args.skip_spectrograms = True
-
-    if "Process files in parallel" in advanced_options:
-        args.jobs = questionary.select(
-            "How many parallel jobs? (higher values may improve speed but use more memory)",
-            choices=["2", "4", "8", "16", "24", "32", "48", "64"],
-            default="4",
+        args.bitdepth = int(args.bitdepth)
+
+        args.channels = questionary.select(
+            "Select target channels:",
+            choices=["1 (mono)", "2 (stereo)"],
+            default="2 (stereo)",
         ).ask()
-        args.jobs = int(args.jobs)
+        args.channels = 1 if "1" in args.channels else 2
 
-    if "Set minimum sample rate" in advanced_options:
-        args.min_samplerate = questionary.select(
-            "Select minimum sample rate:",
+        args.samplerate = questionary.select(
+            "Select target sample rate:",
             choices=["22050", "44100", "48000"],
-            default="22050",
+            default="44100",
         ).ask()
-        args.min_samplerate = int(args.min_samplerate)
+        args.samplerate = int(args.samplerate)
 
-    if "Set minimum bit depth" in advanced_options:
-        args.min_bitdepth = questionary.select(
-            "Select minimum bit depth:", choices=["8", "16", "24"], default="16"
+        # Advanced options in a checkbox group
+        advanced_options = questionary.checkbox(
+            "Select additional options:",
+            choices=[
+                "Auto-convert stereo to mono when possible",
+                "Pre-normalize before conversion",
+                "Skip generating spectrograms",
+                "Preview changes (dry run)",
+                "Process files in parallel",
+                "Set minimum sample rate",
+                "Set minimum bit depth",
+                "Convert in place (no backups)",
+            ],
         ).ask()
-        args.min_bitdepth = int(args.min_bitdepth)
 
-    if args.auto_mono:
-        args.auto_mono_threshold = float(
-            questionary.text(
-                "Auto-mono threshold in dB (default: -95.5):", default="-95.5"
+        args.auto_mono = "Auto-convert stereo to mono when possible" in advanced_options
+        args.pre_normalize = "Pre-normalize before conversion" in advanced_options
+        args.skip_spectrograms = "Skip generating spectrograms" in advanced_options
+        args.dry_run = "Preview changes (dry run)" in advanced_options
+        convert_in_place = "Convert in place (no backups)" in advanced_options
+
+        # Configure backup settings if not converting in place
+        if not convert_in_place:
+            args.backup_dir = questionary.text(
+                "Backup directory path:",
+                default="_backup",
             ).ask()
-        )
+            if args.backup_dir.strip():  # If not empty
+                args.backup_dir = args.backup_dir.strip()
+                # Only ask about spectrograms if they weren't explicitly skipped in advanced options
+                if not args.skip_spectrograms:
+                    args.skip_spectrograms = not questionary.confirm(
+                        "Generate spectrograms for backup comparison?", default=False
+                    ).ask()
+            else:
+                args.backup_dir = "-"
+                args.skip_spectrograms = True
+
+        if "Process files in parallel" in advanced_options:
+            args.jobs = questionary.select(
+                "How many parallel jobs? (higher values may improve speed but use more memory)",
+                choices=["2", "4", "8", "16", "24", "32", "48", "64"],
+                default="4",
+            ).ask()
+            args.jobs = int(args.jobs)
+
+        if "Set minimum sample rate" in advanced_options:
+            args.min_samplerate = questionary.select(
+                "Select minimum sample rate:",
+                choices=["22050", "44100", "48000"],
+                default="22050",
+            ).ask()
+            args.min_samplerate = int(args.min_samplerate)
+
+        if "Set minimum bit depth" in advanced_options:
+            args.min_bitdepth = questionary.select(
+                "Select minimum bit depth:", choices=["8", "16", "24"], default="16"
+            ).ask()
+            args.min_bitdepth = int(args.min_bitdepth)
+
+        if args.auto_mono:
+            args.auto_mono_threshold = float(
+                questionary.text(
+                    "Auto-mono threshold in dB (default: -95.5):", default="-95.5"
+                ).ask()
+            )
+
+        return "shrink", args
 
-    return "shrink", args
+    return action.split()[0].lower(), args  # 'shrink', 'duplicates', or 'restore'
 
 
 def process_duplicates(args):
@@ -1685,7 +1745,7 @@ def main():
         restore_from_backup(args)
     elif action == "duplicates":
         process_duplicates(args)
-    else:  # Shrink samples
+    elif action == "shrink":
         # Delete all '._' files before processing anything
         for path in args.files:
             if os.path.isdir(path):

From 16de3cc25007eee732408ad6ff6d5dac4f1cce8d Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 08:27:52 +0000
Subject: [PATCH 63/66] Nov 18, 2024, 12:27 AM

---
 sample-shrinker-python/sample-shrinker.py | 82 ++++++++++++++++++++---
 1 file changed, 73 insertions(+), 9 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 52980d8..31ce259 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -873,8 +873,38 @@ def find_duplicate_directories(paths, progress, task_id):
     dir_map = defaultdict(list)
     scanned = 0
 
+    def get_directory_signature(dir_path):
+        """Generate a signature for a directory based on its contents."""
+        try:
+            # Get all files and subdirectories recursively
+            all_items = list(dir_path.rglob("*"))
+
+            # Count files and directories
+            files = [f for f in all_items if f.is_file()]
+            subdirs = [d for d in all_items if d.is_dir()]
+
+            # Calculate total size of all files
+            total_size = sum(f.stat().st_size for f in files)
+
+            # Get relative paths of all items for structure comparison
+            rel_paths = sorted(str(item.relative_to(dir_path)) for item in all_items)
+
+            # Get file sizes in a deterministic order
+            file_sizes = sorted(f.stat().st_size for f in files)
+
+            return {
+                "file_count": len(files),
+                "subdir_count": len(subdirs),
+                "total_size": total_size,
+                "structure": rel_paths,
+                "file_sizes": file_sizes,
+            }
+        except Exception as e:
+            console.print(f"[yellow]Error analyzing directory {dir_path}: {e}[/yellow]")
+            return None
+
     for path_str in paths:
-        path = Path(path_str)  # Convert string to Path
+        path = Path(path_str)
         if path.is_dir():
             for dir_path in path.rglob("*"):
                 if dir_path.is_dir():
@@ -882,16 +912,50 @@ def find_duplicate_directories(paths, progress, task_id):
                     scanned += 1
                     progress.update(task_id, completed=scanned)
 
-                    # Get directory name, file count, and total size
-                    dir_name = dir_path.name.lower()  # Case-insensitive comparison
-                    files = list(dir_path.glob("*"))
-                    file_count = len([f for f in files if f.is_file()])
-                    total_size = sum(f.stat().st_size for f in files if f.is_file())
-
-                    dir_map[(dir_name, file_count, total_size)].append(dir_path)
+                    # Get directory signature
+                    signature = get_directory_signature(dir_path)
+                    if signature:
+                        # Create a unique key combining name and content signature
+                        dir_name = dir_path.name.lower()  # Case-insensitive comparison
+                        key = (
+                            dir_name,
+                            signature["file_count"],
+                            signature["subdir_count"],
+                            signature["total_size"],
+                            tuple(signature["file_sizes"]),  # Make hashable
+                            tuple(signature["structure"]),  # Make hashable
+                        )
+                        dir_map[key].append(dir_path)
 
     # Return only directories that have duplicates
-    return {k: v for k, v in dir_map.items() if len(v) > 1}
+    duplicates = {k: v for k, v in dir_map.items() if len(v) > 1}
+
+    if duplicates:
+        # Log detailed information about matches
+        for (
+            name,
+            file_count,
+            subdir_count,
+            total_size,
+            sizes,
+            structure,
+        ), paths in duplicates.items():
+            console.print(
+                f"\n[cyan]Found potential duplicates:[/cyan]\n"
+                f"Directory name: [yellow]{name}[/yellow]\n"
+                f"File count: {file_count}\n"
+                f"Subdirectory count: {subdir_count}\n"
+                f"Total size: {total_size} bytes\n"
+                f"Structure match: {len(structure)} items"
+            )
+            if args.verbose:
+                console.print("Directory structure:")
+                for item in structure[:10]:  # Show first 10 items
+                    console.print(f"  {item}")
+                if len(structure) > 10:
+                    console.print("  ...")
+
+    return duplicates
 
 
 def process_duplicate_directories(duplicates, args):

From ab2eb22324181272a00c3d01c7e561f0ecf1cfd1 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 08:28:45 +0000
Subject: [PATCH 64/66] Nov 18, 2024, 12:28 AM

---
 sample-shrinker-python/sample-shrinker.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 31ce259..8484d56 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -868,7 +868,7 @@ def process_duplicate_files(duplicates, fuzzy_groups, args):
                     print(f"Error moving file {file_path}: {e}")
 
 
-def find_duplicate_directories(paths, progress, task_id):
+def find_duplicate_directories(paths, progress, task_id, args):
     """Find directories with matching names and file counts."""
     dir_map = defaultdict(list)
     scanned = 0
@@ -1252,8 +1252,10 @@ def process_duplicates(args):
             total=total_dirs,
         )
 
-        # Modify find_duplicate_directories to update progress
-        dir_duplicates = find_duplicate_directories(args.files, progress, scan_task)
+        # Pass args to find_duplicate_directories
+        dir_duplicates = find_duplicate_directories(
+            args.files, progress, scan_task, args
+        )
         progress.update(scan_task, completed=total_dirs)
 
     if dir_duplicates:

From e8a218cc752153869d51a157a0a9e336addfded1 Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 08:30:09 +0000
Subject: [PATCH 65/66] Nov 18, 2024, 12:30 AM

---
 sample-shrinker-python/sample-shrinker.py | 93 ++++++++++++++++++-----
 1 file changed, 73 insertions(+), 20 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 8484d56..85cc9e2 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -960,36 +960,89 @@ def get_directory_signature(dir_path):
 
 def process_duplicate_directories(duplicates, args):
     """Process duplicate directories, keeping the oldest copy."""
-    for (dir_name, file_count, total_size), paths in duplicates.items():
-        print(
-            f"\nFound duplicate directories named '{dir_name}' with {file_count} files ({total_size} bytes):"
+    for (
+        dir_name,
+        file_count,
+        subdir_count,
+        total_size,
+        sizes,
+        structure,
+    ), paths in duplicates.items():
+        console.print(
+            f"\nFound duplicate directories named '[cyan]{dir_name}[/cyan]' "
+            f"with {file_count} files, {subdir_count} subdirectories "
+            f"({total_size} bytes):"
         )
 
         # Sort paths by creation time
-        paths_with_time = [(p, p.stat().st_ctime) for p in paths]
-        paths_with_time.sort(key=lambda x: x[1])
+        valid_paths = []
+        for path in paths:
+            try:
+                stat = path.stat()
+                valid_paths.append((path, stat.st_ctime))
+            except FileNotFoundError:
+                console.print(f"[yellow]Warning: Directory not found: {path}[/yellow]")
+                continue
+
+        if not valid_paths:
+            console.print("[red]No valid paths found in group[/red]")
+            return
+
+        valid_paths.sort(key=lambda x: x[1])
 
         # Keep the oldest directory
-        original_dir = paths_with_time[0][0]
-        print(
-            f"Keeping oldest copy: {original_dir} (created: {time.ctime(paths_with_time[0][1])})"
+        original_dir = valid_paths[0][0]
+        console.print(
+            f"Keeping oldest copy: [green]{original_dir}[/green] "
+            f"(created: {time.ctime(valid_paths[0][1])})"
         )
 
         # Process newer copies
-        for dir_path, ctime in paths_with_time[1:]:
-            print(f"Moving duplicate: {dir_path} (created: {time.ctime(ctime)})")
-            if not args.dry_run:
-                # Create backup path
-                rel_path = dir_path.relative_to(dir_path.parent.parent)
-                backup_path = Path(args.backup_dir) / rel_path
+        for dir_path, ctime in valid_paths[1:]:
+            try:
+                if not dir_path.exists():
+                    console.print(
+                        f"[yellow]Warning: Directory disappeared: {dir_path}[/yellow]"
+                    )
+                    continue
 
-                # Ensure backup directory exists
-                backup_path.parent.mkdir(parents=True, exist_ok=True)
+                console.print(
+                    f"Moving duplicate: [yellow]{dir_path}[/yellow] "
+                    f"(created: {time.ctime(ctime)})"
+                )
 
-                try:
-                    shutil.move(str(dir_path), str(backup_path))
-                except Exception as e:
-                    print(f"Error moving directory {dir_path}: {e}")
+                if not args.dry_run:
+                    try:
+                        # Create backup path
+                        rel_path = dir_path.relative_to(dir_path.parent.parent)
+                        backup_path = Path(args.backup_dir) / rel_path
+
+                        # Ensure backup directory exists
+                        backup_path.parent.mkdir(parents=True, exist_ok=True)
+
+                        if backup_path.exists():
+                            console.print(
+                                f"[yellow]Warning: Backup path already exists: {backup_path}[/yellow]"
+                            )
+                            # Create a unique name by appending a number
+                            counter = 1
+                            while backup_path.exists():
+                                new_name = f"{backup_path.name}_{counter}"
+                                backup_path = backup_path.parent / new_name
+                                counter += 1
+                            console.print(
+                                f"[blue]Using alternate path: {backup_path}[/blue]"
+                            )
+
+                        shutil.move(str(dir_path), str(backup_path))
+                    except Exception as e:
+                        console.print(
+                            f"[red]Error moving directory {dir_path}: {e}[/red]"
+                        )
+
+            except Exception as e:
+                console.print(f"[red]Error processing directory {dir_path}: {e}[/red]")
+                continue
 
 
 def get_interactive_config():

From f9566177625a03248f56306e4e6270e866153dab Mon Sep 17 00:00:00 2001
From: Chris Farrell <chris@thefarrells.nyc>
Date: Mon, 18 Nov 2024 08:34:36 +0000
Subject: [PATCH 66/66] Nov 18, 2024, 12:34 AM

---
 sample-shrinker-python/sample-shrinker.py | 26 ++---------------------
 1 file changed, 2 insertions(+), 24 deletions(-)

diff --git a/sample-shrinker-python/sample-shrinker.py b/sample-shrinker-python/sample-shrinker.py
index 85cc9e2..d39f7a8 100644
--- a/sample-shrinker-python/sample-shrinker.py
+++ b/sample-shrinker-python/sample-shrinker.py
@@ -1331,30 +1331,8 @@ def process_duplicates(args):
                 dir_task = progress.add_task(
                     "[green]Processing directories...", total=count
                 )
-                with ThreadPoolExecutor(max_workers=args.jobs) as executor:
-                    futures = []
-                    for (
-                        dir_name,
-                        file_count,
-                        total_size,
-                    ), paths in dir_duplicates.items():
-                        future = executor.submit(
-                            process_directory_group,
-                            dir_name,
-                            file_count,
-                            total_size,
-                            paths,
-                            args,
-                            progress,
-                        )
-                        futures.append(future)
-
-                    for future in as_completed(futures):
-                        try:
-                            future.result()
-                            progress.advance(dir_task)
-                        except Exception as e:
-                            console.print(f"[red]Error processing directory: {e}[/red]")
+                process_duplicate_directories(dir_duplicates, args)
+                progress.update(dir_task, completed=count)
 
     # Phase 2: File scan - Compare individual files
     console.print("\n[cyan]Phase 2: Individual File Analysis[/cyan]")