From 0d7f6a19994c45464db57db30a20ace2c6752d23 Mon Sep 17 00:00:00 2001
From: nilsk <nils.kruthoff@iubh-fernstudium.de>
Date: Mon, 16 Jun 2025 14:31:24 +0200
Subject: [PATCH 1/3] added `ImageHandlingMode` to the `ParserConfig`

---
 README.md            | 20 ++++++++++++++------
 src/parser_config.rs | 39 +++++++++++++++++++++++++++++++++------
 2 files changed, 47 insertions(+), 12 deletions(-)
diff --git a/README.md b/README.md
index 181c2e6..b916ffc 100644
--- a/README.md
+++ b/README.md
@@ -62,12 +62,20 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 
 ## Config Parameters
 
-| Parameter | Type   | Default | Description                                                                                                                                                |
-|-----------|--------|-------|------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `extract_images` | `bool` | `true` | Whether images are extracted from slides or not                                                                                                            |
-| `compress_images` | `bool` | `true` | Whether images are compressed before encoding or not                                                                                                       |
-| `image_quality` | `u8`   | `80`  | Defines the image compression quality `(0-100)`. Higher values mean better quality but larger file sizes.                                                  |
-
+| Parameter                | Type                  | Default       | Description                                                                                               |
+|--------------------------|-----------------------|---------------|-----------------------------------------------------------------------------------------------------------|
+| `extract_images`         | `bool`                | `true`        | Whether images are extracted from slides or not                                                           |
+| `compress_images`        | `bool`                | `true`        | Whether images are compressed before encoding or not                                                      |
+| `image_quality`          | `u8`                  | `80`          | Defines the image compression quality `(0-100)`. Higher values mean better quality but larger file sizes. |
+| `image_handling_mode`    | `ImageHandlingMode`   | `InMarkdown`  | Determines how images are handled during content export                                                   |      
+<br/>
+
+#### Member of `ImageHandlingMode`
+| Member                | Description                                                                                           |
+|-----------------------|-------------------------------------------------------------------------------------------------------|
+| `InMarkdown`          | Images are embedded directly in the Markdown output using standard syntax as `base64` data (`![]()`)  |            
+| `ManuallyMarkdown`    | Image handling is delegated to the user, requiring manual copying or referencing (as `base64`)        |            
+| `ManuallyRaw`         | Image handling is delegated to the user, requiring manual copying or referencing (as raw `binary`)    |
 ---
 
 ## 🏗 Project Structure
diff --git a/src/parser_config.rs b/src/parser_config.rs
index 53eef57..7398af9 100644
--- a/src/parser_config.rs
+++ b/src/parser_config.rs
@@ -1,15 +1,32 @@
-﻿/// Configuration options for the PPTX parser.
+﻿/// Determines how images are handled during content export.
+///
+/// # Members
+///
+/// | Member                | Description                                                                                           |
+/// |-----------------------|-------------------------------------------------------------------------------------------------------|            
+/// | `InMarkdown`          | Images are embedded directly in the Markdown output using standard syntax as `base64` data (`![]()`)  |            
+/// | `ManuallyMarkdown`    | Image handling is delegated to the user, requiring manual copying or referencing (as `base64`)        |            
+/// | `ManuallyRaw`         | Image handling is delegated to the user, requiring manual copying or referencing (as raw `binary`)    |            
+#[derive(Debug, Clone)]
+pub enum ImageHandlingMode {
+    InMarkdown,
+    ManuallyMarkdown,
+    ManuallyRaw,
+}
+
+/// Configuration options for the PPTX parser.
 ///
 /// Use [`ParserConfig::builder()`] to create a configuration instance.
 /// This allows you to customize only the desired fields while falling back to sensible defaults for the rest.
 ///
 /// # Configuration Options
 /// 
-/// | Parameter | Type | Default | Description |
-/// |-----------|------|---------|-------------|
-/// | `extract_images` | `bool` | `true` | Whether images are extracted from slides or not |
-/// | `compress_images` | `bool` | `true` | Whether images are compressed before encoding or not |
-/// | `image_quality` | `u8` | `80` | Compression level (0-100);<br/> higher values retain more detail but increase file size |
+/// | Parameter                 | Type                  | Default       | Description                                                                               |
+/// |---------------------------|-----------------------|---------------|-------------------------------------------------------------------------------------------|
+/// | `extract_images`          | `bool`                | `true`        | Whether images are extracted from slides or not                                           |
+/// | `compress_images`         | `bool`                | `true`        | Whether images are compressed before encoding or not                                      |
+/// | `image_quality`           | `u8`                  | `80`          | Compression level (0-100);<br/> higher values retain more detail but increase file size   |
+/// | `image_handling_mode`     | `ImageHandlingMode`   | `InMarkdown`  | Determines how images are handled during content export.                                  |
 ///
 /// # Example
 ///
@@ -25,6 +42,7 @@ pub struct ParserConfig {
     pub extract_images: bool,
     pub compress_images: bool,
     pub quality: u8,
+    pub image_handling_mode: ImageHandlingMode,
 }
 
 impl Default for ParserConfig {
@@ -33,6 +51,7 @@ impl Default for ParserConfig {
             extract_images: true,
             compress_images: true,
             quality: 80,
+            image_handling_mode: ImageHandlingMode::InMarkdown,
         }
     }
 }
@@ -51,6 +70,7 @@ pub struct ParserConfigBuilder {
     extract_images: Option<bool>,
     compress_images: Option<bool>,
     image_quality: Option<u8>,
+    image_handling_mode: Option<ImageHandlingMode>,
 }
 
 impl ParserConfigBuilder {
@@ -73,6 +93,12 @@ impl ParserConfigBuilder {
         self
     }
     
+    /// Specifies the mode for processing the image after its extracted
+    pub fn image_handling_mode(mut self, value: ImageHandlingMode) -> Self {
+        self.image_handling_mode = Some(value);
+        self
+    }
+    
 
     /// Builds the final [`ParserConfig`] instance, applying default values for any fields that were not set.
     pub fn build(self) -> ParserConfig {
@@ -80,6 +106,7 @@ impl ParserConfigBuilder {
             extract_images: self.extract_images.unwrap_or(true),
             compress_images: self.compress_images.unwrap_or(true),
             quality: self.image_quality.unwrap_or(80),
+            image_handling_mode: self.image_handling_mode.unwrap_or(ImageHandlingMode::InMarkdown),
         }
     }
 }
\ No newline at end of file

From c1f87164f33b8afd10ae7859a0d63b369b26e3bb Mon Sep 17 00:00:00 2001
From: nilsk <nils.kruthoff@iubh-fernstudium.de>
Date: Mon, 16 Jun 2025 14:37:55 +0200
Subject: [PATCH 2/3] applied unrelated `clippy` refactoring suggestions

---
 src/slide.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/slide.rs b/src/slide.rs
index 81bbff2..f34e3b8 100644
--- a/src/slide.rs
+++ b/src/slide.rs
@@ -119,10 +119,10 @@ impl Slide {
                             counters.resize(level + 1, 0);
                         }
 
-                        if level > previous_level {
-                            counters[level] = 0;
-                        } else if level < previous_level {
-                            counters.truncate(level + 1);
+                        match level.cmp(&previous_level) {
+                            std::cmp::Ordering::Greater => counters[level] = 0,
+                            std::cmp::Ordering::Less => counters.truncate(level + 1),
+                            std::cmp::Ordering::Equal => {}
                         }
 
                         counters[level] += 1;
@@ -296,7 +296,7 @@ mod tests {
 
         let raw_image = load_image_data("example-image.jpg");
 
-        if let Some(compression_result) = slide.compress_image(&*raw_image) {
+        if let Some(compression_result) = slide.compress_image(&raw_image) {
             assert!(compression_result.len() < raw_image.len());
         } else {
             panic!("Compression failed");
@@ -308,7 +308,7 @@ mod tests {
         let slide = mock_slide();
         let raw_image = load_image_data("example-image.jpg");
 
-        if let Some(compression_result) = slide.compress_image(&*raw_image) {
+        if let Some(compression_result) = slide.compress_image(&raw_image) {
             let result = image::load_from_memory(&compression_result);
             assert!(result.is_ok());
         } else {

From 8145e6288b409b347a49a62c98079d26f9246a8e Mon Sep 17 00:00:00 2001
From: nilsk <nils.kruthoff@iubh-fernstudium.de>
Date: Mon, 16 Jun 2025 16:11:45 +0200
Subject: [PATCH 3/3] included logic to handle images manually

---
 CHANGELOG.md                        | 16 +++++
 README.md                           | 13 ++--
 examples/basic_usage.rs             |  5 +-
 examples/image_extraction.rs        | 11 ++--
 examples/manual_image_extraction.rs | 95 +++++++++++++++++++++++++++++
 src/lib.rs                          |  2 +-
 src/parser_config.rs                | 26 ++++----
 src/slide.rs                        | 51 +++++++++++++++-
 src/types.rs                        |  2 +-
 9 files changed, 189 insertions(+), 32 deletions(-)
 create mode 100644 examples/manual_image_extraction.rs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bacc1cc..90a2dfe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.3.0] - _unreleased_
+
+### Added
+
+- Reworked the extraction of images by adding `ImageHandlingMode` to the `ParserConfig`. With this, users can decide to manually extract images and handle the logic [(#19)](https://github.com/nilskruthoff/pptx-parser/issues/19)
+- New [example](https://github.com/nilskruthoff/pptx-parser/tree/master/examples) `manual_image_extraction.rs` to show how to handle images manually
+- `ManualImage` struct to encapsulate data and meta data of images
+
+### Removed
+
+- `image_extraction` from [examples](https://github.com/nilskruthoff/pptx-parser/tree/master/examples) directory (replaced by `manual_image_extraction.rs`)
+
+### Changed
+
+---
+
 ## [0.2.0] - 2025-06-15
 
 ### Added
diff --git a/README.md b/README.md
index b916ffc..8804b3f 100644
--- a/README.md
+++ b/README.md
@@ -64,18 +64,17 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 
 | Parameter                | Type                  | Default       | Description                                                                                               |
 |--------------------------|-----------------------|---------------|-----------------------------------------------------------------------------------------------------------|
-| `extract_images`         | `bool`                | `true`        | Whether images are extracted from slides or not                                                           |
-| `compress_images`        | `bool`                | `true`        | Whether images are compressed before encoding or not                                                      |
+| `extract_images`         | `bool`                | `true`        | Whether images are extracted from slides or not. If false, images can not be extracted manually either.   |
+| `compress_images`        | `bool`                | `true`        | Whether images are compressed before encoding or not. Effects manually extracted images too.              |
 | `image_quality`          | `u8`                  | `80`          | Defines the image compression quality `(0-100)`. Higher values mean better quality but larger file sizes. |
 | `image_handling_mode`    | `ImageHandlingMode`   | `InMarkdown`  | Determines how images are handled during content export                                                   |      
 <br/>
 
 #### Member of `ImageHandlingMode`
-| Member                | Description                                                                                           |
-|-----------------------|-------------------------------------------------------------------------------------------------------|
-| `InMarkdown`          | Images are embedded directly in the Markdown output using standard syntax as `base64` data (`![]()`)  |            
-| `ManuallyMarkdown`    | Image handling is delegated to the user, requiring manual copying or referencing (as `base64`)        |            
-| `ManuallyRaw`         | Image handling is delegated to the user, requiring manual copying or referencing (as raw `binary`)    |
+| Member          | Description                                                                                           |
+|-----------------|-------------------------------------------------------------------------------------------------------|
+| `InMarkdown`    | Images are embedded directly in the Markdown output using standard syntax as `base64` data (`![]()`)  |            
+| `Manually`      | Image handling is delegated to the user, requiring manual copying or referencing (as `base64`)        |            
 ---
 
 ## 🏗 Project Structure
diff --git a/examples/basic_usage.rs b/examples/basic_usage.rs
index 88e134c..903c4ee 100644
--- a/examples/basic_usage.rs
+++ b/examples/basic_usage.rs
@@ -4,7 +4,7 @@
 //!
 //! Run with: cargo run --example basic_usage <path/to/your/presentation.pptx>
 
-use pptx_to_md::{PptxContainer, Result, ParserConfig};
+use pptx_to_md::{PptxContainer, Result, ParserConfig, ImageHandlingMode};
 use std::env;
 use std::fs::File;
 use std::io::Write;
@@ -25,6 +25,9 @@ fn main() -> Result<()> {
     // Use the config builder to build your config
     let config = ParserConfig::builder()
         .extract_images(true)
+        .compress_images(true)
+        .quality(75)
+        .image_handling_mode(ImageHandlingMode::InMarkdown)
         .build();
     
     // Open the PPTX file
diff --git a/examples/image_extraction.rs b/examples/image_extraction.rs
index 3319f7c..17a9c2d 100644
--- a/examples/image_extraction.rs
+++ b/examples/image_extraction.rs
@@ -66,13 +66,10 @@ fn main() -> Result<()> {
                                 ext
                             );
 
-                            match image_data {
-                                Some(image_data) => {
-                                    fs::write(&output_path, image_data)?;
-                                    println!("Saved image to {}", output_path);
-                                    image_count += 1;
-                                },
-                                None => {}
+                            if let Some(image_data) = image_data {
+                                fs::write(&output_path, image_data)?;
+                                println!("Saved image to {}", output_path);
+                                image_count += 1;
                             }
                         }
                     }
diff --git a/examples/manual_image_extraction.rs b/examples/manual_image_extraction.rs
new file mode 100644
index 0000000..1e8e4b7
--- /dev/null
+++ b/examples/manual_image_extraction.rs
@@ -0,0 +1,95 @@
+﻿//! Basic usage example for the pptx-to-md crate
+//!
+//! This example demonstrates how to open a PPTX file and convert all slides to Markdown.
+//!
+//! Run with: cargo run --example manual_image_extraction <path/to/your/presentation.pptx>
+
+use pptx_to_md::{PptxContainer, Result, ParserConfig, ImageHandlingMode};
+use std::{env, fs};
+use std::fs::File;
+use std::io::Write;
+use std::path::Path;
+use base64::Engine;
+use base64::engine::general_purpose;
+
+fn main() -> Result<()> {
+    // Get the PPTX file path from command line arguments
+    let args: Vec<String> = env::args().collect();
+    let pptx_path = if args.len() > 1 {
+        &args[1]
+    } else {
+        eprintln!("Usage: cargo run --example manual_image_extraction <path/to/presentation.pptx>");
+        return Ok(());
+    };
+
+    println!("Processing PPTX file: {}", pptx_path);
+
+    // Use the config builder to build your config
+    let config = ParserConfig::builder()
+        .extract_images(true)
+        .compress_images(true)
+        .quality(75)
+        .image_handling_mode(ImageHandlingMode::Manually)
+        .build();
+
+    // Open the PPTX file
+    let mut container = PptxContainer::open(Path::new(pptx_path), config)?;
+
+    // Parse all slides
+    let slides = container.parse_all()?;
+
+    println!("Found {} slides", slides.len());
+
+    // create a new Markdown file
+    let mut md_file = File::create("output.md")?;
+
+    // Create output directory
+    let output_dir = "extracted_images";
+    fs::create_dir_all(output_dir)?;
+
+    // Process slides one by one using the iterator
+    let mut image_count = 1;
+
+    // Convert each slide to Markdown and save
+    for slide in slides {
+        if let Some(md_content) = slide.convert_to_md() {
+            writeln!(md_file, "{}", md_content).expect("Couldn't write to file");
+        }
+        
+        // Manually load the base64 encoded image strings from the slide
+        if let Some(images) = slide.load_images_manually() {
+            for image in images {
+                
+                // Decode the base64 strings back to raw image data
+                let image_data = general_purpose::STANDARD.decode(image.base64_content.clone()).unwrap();
+
+                // Extract image extension if the image is not compressed, otherwise its always `.jpg`
+                let ext = slide.config.compress_images
+                    .then(|| "jpg".to_string())
+                    .unwrap_or_else(|| slide.get_image_extension(&image.img_ref.target.clone()));
+
+                // Construct a unique file name
+                let file_name = format!("slide{}_image{}_{}", slide.slide_number, image_count, &image.img_ref.id);
+                
+                // Save the image
+                let output_path = format!(
+                    "{}/{}.{}",
+                    output_dir,
+                    &file_name,
+                    ext
+                );
+                fs::write(&output_path, image_data)?;
+                println!("Saved image to {}", output_path);
+
+                // Write the image data into the Markdown file
+                writeln!(md_file, "![{}](data:image/{};base64,{})", file_name, ext, image.base64_content).expect("Couldn't write to file");
+                
+                image_count += 1;
+            }
+        }
+    }
+
+    println!("All slides converted successfully!");
+
+    Ok(())
+}
\ No newline at end of file
diff --git a/src/lib.rs b/src/lib.rs
index 80293e4..a17a9a2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -7,7 +7,7 @@ pub mod parse_rels;
 mod parser_config;
 
 pub use container::PptxContainer;
-pub use parser_config::ParserConfig;
+pub use parser_config::{ParserConfig, ImageHandlingMode};
 pub use slide::Slide;
 pub use types::*;
 
diff --git a/src/parser_config.rs b/src/parser_config.rs
index 7398af9..d4ad65d 100644
--- a/src/parser_config.rs
+++ b/src/parser_config.rs
@@ -2,16 +2,14 @@
 ///
 /// # Members
 ///
-/// | Member                | Description                                                                                           |
-/// |-----------------------|-------------------------------------------------------------------------------------------------------|            
-/// | `InMarkdown`          | Images are embedded directly in the Markdown output using standard syntax as `base64` data (`![]()`)  |            
-/// | `ManuallyMarkdown`    | Image handling is delegated to the user, requiring manual copying or referencing (as `base64`)        |            
-/// | `ManuallyRaw`         | Image handling is delegated to the user, requiring manual copying or referencing (as raw `binary`)    |            
-#[derive(Debug, Clone)]
+/// | Member                | Description                                                                                                           |
+/// |-----------------------|-----------------------------------------------------------------------------------------------------------------------|            
+/// | `InMarkdown`          | Images are embedded directly in the Markdown output using standard syntax as `base64` data (`![]()`)                  |            
+/// | `Manually`            | Image handling is delegated to the user, requiring manual copying or referencing (as `base64` encoded string)         |            
+#[derive(Debug, Clone, PartialEq, Eq)]
 pub enum ImageHandlingMode {
     InMarkdown,
-    ManuallyMarkdown,
-    ManuallyRaw,
+    Manually
 }
 
 /// Configuration options for the PPTX parser.
@@ -21,12 +19,12 @@ pub enum ImageHandlingMode {
 ///
 /// # Configuration Options
 /// 
-/// | Parameter                 | Type                  | Default       | Description                                                                               |
-/// |---------------------------|-----------------------|---------------|-------------------------------------------------------------------------------------------|
-/// | `extract_images`          | `bool`                | `true`        | Whether images are extracted from slides or not                                           |
-/// | `compress_images`         | `bool`                | `true`        | Whether images are compressed before encoding or not                                      |
-/// | `image_quality`           | `u8`                  | `80`          | Compression level (0-100);<br/> higher values retain more detail but increase file size   |
-/// | `image_handling_mode`     | `ImageHandlingMode`   | `InMarkdown`  | Determines how images are handled during content export.                                  |
+/// | Parameter                 | Type                  | Default       | Description                                                                                               |
+/// |---------------------------|-----------------------|---------------|-----------------------------------------------------------------------------------------------------------|
+/// | `extract_images`          | `bool`                | `true`        | Whether images are extracted from slides or not. If false, images can not be extracted manually either.   |
+/// | `compress_images`         | `bool`                | `true`        | Whether images are compressed before encoding or not. Effects manually extracted images too.              |
+/// | `image_quality`           | `u8`                  | `80`          | Compression level (0-100);<br/> higher values retain more detail but increase file size                   |
+/// | `image_handling_mode`     | `ImageHandlingMode`   | `InMarkdown`  | Determines how images are handled during content export.                                                  |
 ///
 /// # Example
 ///
diff --git a/src/slide.rs b/src/slide.rs
index f34e3b8..85384da 100644
--- a/src/slide.rs
+++ b/src/slide.rs
@@ -4,6 +4,23 @@ use std::collections::HashMap;
 use std::io::Cursor;
 use std::path::Path;
 use image::ImageOutputFormat;
+use crate::parser_config::ImageHandlingMode;
+
+/// Encapsulates images for manual extraction of images from slides
+#[derive(Debug)]
+pub struct ManualImage {
+    pub base64_content: String,
+    pub img_ref: ImageReference,
+}
+
+impl ManualImage {
+    pub fn new(base64_content: String, img_ref: ImageReference) -> ManualImage {
+        Self {
+            base64_content,
+            img_ref,
+        }
+    }
+}
 
 /// Represents a single slide extracted from a PowerPoint (pptx) file.
 ///
@@ -91,10 +108,12 @@ impl Slide {
                     slide_txt.push('\n');
                 },
                 SlideElement::Image(image_ref) => {
+                    if self.config.image_handling_mode != ImageHandlingMode::InMarkdown { slide_txt.push('\n'); continue; }
+                    
                     if let Some(image_data) = self.image_data.get(&image_ref.id) {
                         let image_data = self.config.compress_images
                             .then(|| self.compress_image(image_data))
-                            .unwrap_or(Option::from(image_data.clone()));
+                            .unwrap_or_else(|| Option::from(image_data.clone()));
 
                         let base64_string = general_purpose::STANDARD.encode(image_data?);
                         let image_name = &image_ref.target.split('/').last()?;
@@ -226,6 +245,36 @@ impl Slide {
             None
         }
     }
+    
+    pub fn load_images_manually(&self) -> Option<Vec<ManualImage>> {
+        let mut images: Vec<ManualImage> = Vec::new();
+        
+        let image_refs: Vec<&ImageReference> = self.elements
+            .iter()
+            .filter_map(|element| match element {
+                SlideElement::Image(ref img) => Some(img),
+                _ => None,
+            })
+            .collect();
+        
+        for image_ref in image_refs {
+            if let Some(image_data) = self.image_data.get(&image_ref.id) {
+                let image_data = self.config.compress_images
+                    .then( | | self.compress_image(image_data))
+                    .unwrap_or_else(|| Option::from(image_data.clone()));
+
+                let base64_str = general_purpose::STANDARD.encode(image_data?);
+                
+                let image = ManualImage::new(
+                    base64_str,
+                    image_ref.clone(),
+                );
+                images.push(image);
+            }
+        }
+        
+        Some(images)
+    }
 }
 
 #[cfg(test)]
diff --git a/src/types.rs b/src/types.rs
index 12bf583..cc90445 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -17,7 +17,7 @@ pub enum SlideElement {
     Unknown,
 }
 
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct ImageReference {
     pub id: String,
     pub target: String,