nilskruthoff · nilskruthoff · Jun 16, 2025 · Jun 16, 2025 · Jun 16, 2025 · Jun 16, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.3.0] - _unreleased_
+
+### Added
+
+- Reworked the extraction of images by adding `ImageHandlingMode` to the `ParserConfig`. With this, users can decide to manually extract images and handle the logic [(#19)](https://github.com/nilskruthoff/pptx-parser/issues/19)
+- New [example](https://github.com/nilskruthoff/pptx-parser/tree/master/examples) `manual_image_extraction.rs` to show how to handle images manually
+- `ManualImage` struct to encapsulate data and meta data of images
+
+### Removed
+
+- `image_extraction` from [examples](https://github.com/nilskruthoff/pptx-parser/tree/master/examples) directory (replaced by `manual_image_extraction.rs`)
+
+### Changed
+
+---
+
 ## [0.2.0] - 2025-06-15
 
 ### Added

diff --git a/README.md b/README.md
@@ -62,12 +62,19 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 
 ## Config Parameters
 
-| Parameter | Type   | Default | Description                                                                                                                                                |
-|-----------|--------|-------|------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `extract_images` | `bool` | `true` | Whether images are extracted from slides or not                                                                                                            |
-| `compress_images` | `bool` | `true` | Whether images are compressed before encoding or not                                                                                                       |
-| `image_quality` | `u8`   | `80`  | Defines the image compression quality `(0-100)`. Higher values mean better quality but larger file sizes.                                                  |
-
+| Parameter                | Type                  | Default       | Description                                                                                               |
+|--------------------------|-----------------------|---------------|-----------------------------------------------------------------------------------------------------------|
+| `extract_images`         | `bool`                | `true`        | Whether images are extracted from slides or not. If false, images can not be extracted manually either.   |
+| `compress_images`        | `bool`                | `true`        | Whether images are compressed before encoding or not. Effects manually extracted images too.              |
+| `image_quality`          | `u8`                  | `80`          | Defines the image compression quality `(0-100)`. Higher values mean better quality but larger file sizes. |
+| `image_handling_mode`    | `ImageHandlingMode`   | `InMarkdown`  | Determines how images are handled during content export                                                   |      
+<br/>
+
+#### Member of `ImageHandlingMode`
+| Member          | Description                                                                                           |
+|-----------------|-------------------------------------------------------------------------------------------------------|
+| `InMarkdown`    | Images are embedded directly in the Markdown output using standard syntax as `base64` data (`![]()`)  |            
+| `Manually`      | Image handling is delegated to the user, requiring manual copying or referencing (as `base64`)        |            
 ---
 
 ## 🏗 Project Structure

diff --git a/examples/basic_usage.rs b/examples/basic_usage.rs
@@ -4,7 +4,7 @@
 //!
 //! Run with: cargo run --example basic_usage <path/to/your/presentation.pptx>
 
-use pptx_to_md::{PptxContainer, Result, ParserConfig};
+use pptx_to_md::{PptxContainer, Result, ParserConfig, ImageHandlingMode};
 use std::env;
 use std::fs::File;
 use std::io::Write;
@@ -25,6 +25,9 @@ fn main() -> Result<()> {
     // Use the config builder to build your config
     let config = ParserConfig::builder()
         .extract_images(true)
+        .compress_images(true)
+        .quality(75)
+        .image_handling_mode(ImageHandlingMode::InMarkdown)
         .build();
 
     // Open the PPTX file

diff --git a/examples/image_extraction.rs b/examples/image_extraction.rs
@@ -66,13 +66,10 @@ fn main() -> Result<()> {
                                 ext
                             );
 
-                            match image_data {
-                                Some(image_data) => {
-                                    fs::write(&output_path, image_data)?;
-                                    println!("Saved image to {}", output_path);
-                                    image_count += 1;
-                                },
-                                None => {}
+                            if let Some(image_data) = image_data {
+                                fs::write(&output_path, image_data)?;
+                                println!("Saved image to {}", output_path);
+                                image_count += 1;
                             }
                         }
                     }

diff --git a/examples/manual_image_extraction.rs b/examples/manual_image_extraction.rs
@@ -0,0 +1,95 @@
+//! Basic usage example for the pptx-to-md crate
+//!
+//! This example demonstrates how to open a PPTX file and convert all slides to Markdown.
+//!
+//! Run with: cargo run --example manual_image_extraction <path/to/your/presentation.pptx>
+
+use pptx_to_md::{PptxContainer, Result, ParserConfig, ImageHandlingMode};
+use std::{env, fs};
+use std::fs::File;
+use std::io::Write;
+use std::path::Path;
+use base64::Engine;
+use base64::engine::general_purpose;
+
+fn main() -> Result<()> {
+    // Get the PPTX file path from command line arguments
+    let args: Vec<String> = env::args().collect();
+    let pptx_path = if args.len() > 1 {
+        &args[1]
+    } else {
+        eprintln!("Usage: cargo run --example manual_image_extraction <path/to/presentation.pptx>");
+        return Ok(());
+    };
+
+    println!("Processing PPTX file: {}", pptx_path);
+
+    // Use the config builder to build your config
+    let config = ParserConfig::builder()
+        .extract_images(true)
+        .compress_images(true)
+        .quality(75)
+        .image_handling_mode(ImageHandlingMode::Manually)
+        .build();
+
+    // Open the PPTX file
+    let mut container = PptxContainer::open(Path::new(pptx_path), config)?;
+
+    // Parse all slides
+    let slides = container.parse_all()?;
+
+    println!("Found {} slides", slides.len());
+
+    // create a new Markdown file
+    let mut md_file = File::create("output.md")?;
+
+    // Create output directory
+    let output_dir = "extracted_images";
+    fs::create_dir_all(output_dir)?;
+
+    // Process slides one by one using the iterator
+    let mut image_count = 1;
+
+    // Convert each slide to Markdown and save
+    for slide in slides {
+        if let Some(md_content) = slide.convert_to_md() {
+            writeln!(md_file, "{}", md_content).expect("Couldn't write to file");
+        }
+
+        // Manually load the base64 encoded image strings from the slide
+        if let Some(images) = slide.load_images_manually() {
+            for image in images {
+
+                // Decode the base64 strings back to raw image data
+                let image_data = general_purpose::STANDARD.decode(image.base64_content.clone()).unwrap();
+
+                // Extract image extension if the image is not compressed, otherwise its always `.jpg`
+                let ext = slide.config.compress_images
+                    .then(|| "jpg".to_string())
+                    .unwrap_or_else(|| slide.get_image_extension(&image.img_ref.target.clone()));
+
+                // Construct a unique file name
+                let file_name = format!("slide{}_image{}_{}", slide.slide_number, image_count, &image.img_ref.id);
+
+                // Save the image
+                let output_path = format!(
+                    "{}/{}.{}",
+                    output_dir,
+                    &file_name,
+                    ext
+                );
+                fs::write(&output_path, image_data)?;
+                println!("Saved image to {}", output_path);
+
+                // Write the image data into the Markdown file
+                writeln!(md_file, "![{}](data:image/{};base64,{})", file_name, ext, image.base64_content).expect("Couldn't write to file");
+
+                image_count += 1;
+            }
+        }
+    }
+
+    println!("All slides converted successfully!");
+
+    Ok(())
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -7,7 +7,7 @@ pub mod parse_rels;
 mod parser_config;
 
 pub use container::PptxContainer;
-pub use parser_config::ParserConfig;
+pub use parser_config::{ParserConfig, ImageHandlingMode};
 pub use slide::Slide;
 pub use types::*;
 

diff --git a/src/parser_config.rs b/src/parser_config.rs
@@ -1,15 +1,30 @@
-/// Configuration options for the PPTX parser.
+/// Determines how images are handled during content export.
+///
+/// # Members
+///
+/// | Member                | Description                                                                                                           |
+/// |-----------------------|-----------------------------------------------------------------------------------------------------------------------|            
+/// | `InMarkdown`          | Images are embedded directly in the Markdown output using standard syntax as `base64` data (`![]()`)                  |            
+/// | `Manually`            | Image handling is delegated to the user, requiring manual copying or referencing (as `base64` encoded string)         |            
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum ImageHandlingMode {
+    InMarkdown,
+    Manually
+}
+
+/// Configuration options for the PPTX parser.
 ///
 /// Use [`ParserConfig::builder()`] to create a configuration instance.
 /// This allows you to customize only the desired fields while falling back to sensible defaults for the rest.
 ///
 /// # Configuration Options
 /// 
-/// | Parameter | Type | Default | Description |
-/// |-----------|------|---------|-------------|
-/// | `extract_images` | `bool` | `true` | Whether images are extracted from slides or not |
-/// | `compress_images` | `bool` | `true` | Whether images are compressed before encoding or not |
-/// | `image_quality` | `u8` | `80` | Compression level (0-100);<br/> higher values retain more detail but increase file size |
+/// | Parameter                 | Type                  | Default       | Description                                                                                               |
+/// |---------------------------|-----------------------|---------------|-----------------------------------------------------------------------------------------------------------|
+/// | `extract_images`          | `bool`                | `true`        | Whether images are extracted from slides or not. If false, images can not be extracted manually either.   |
+/// | `compress_images`         | `bool`                | `true`        | Whether images are compressed before encoding or not. Effects manually extracted images too.              |
+/// | `image_quality`           | `u8`                  | `80`          | Compression level (0-100);<br/> higher values retain more detail but increase file size                   |
+/// | `image_handling_mode`     | `ImageHandlingMode`   | `InMarkdown`  | Determines how images are handled during content export.                                                  |
 ///
 /// # Example
 ///
@@ -25,6 +40,7 @@ pub struct ParserConfig {
     pub extract_images: bool,
     pub compress_images: bool,
     pub quality: u8,
+    pub image_handling_mode: ImageHandlingMode,
 }
 
 impl Default for ParserConfig {
@@ -33,6 +49,7 @@ impl Default for ParserConfig {
             extract_images: true,
             compress_images: true,
             quality: 80,
+            image_handling_mode: ImageHandlingMode::InMarkdown,
         }
     }
 }
@@ -51,6 +68,7 @@ pub struct ParserConfigBuilder {
     extract_images: Option<bool>,
     compress_images: Option<bool>,
     image_quality: Option<u8>,
+    image_handling_mode: Option<ImageHandlingMode>,
 }
 
 impl ParserConfigBuilder {
@@ -73,13 +91,20 @@ impl ParserConfigBuilder {
         self
     }
 
+    /// Specifies the mode for processing the image after its extracted
+    pub fn image_handling_mode(mut self, value: ImageHandlingMode) -> Self {
+        self.image_handling_mode = Some(value);
+        self
+    }
+
 
     /// Builds the final [`ParserConfig`] instance, applying default values for any fields that were not set.
     pub fn build(self) -> ParserConfig {
         ParserConfig {
             extract_images: self.extract_images.unwrap_or(true),
             compress_images: self.compress_images.unwrap_or(true),
             quality: self.image_quality.unwrap_or(80),
+            image_handling_mode: self.image_handling_mode.unwrap_or(ImageHandlingMode::InMarkdown),
         }
     }
 }
diff --git a/src/slide.rs b/src/slide.rs
@@ -4,6 +4,23 @@ use std::collections::HashMap;
 use std::io::Cursor;
 use std::path::Path;
 use image::ImageOutputFormat;
+use crate::parser_config::ImageHandlingMode;
+
+/// Encapsulates images for manual extraction of images from slides
+#[derive(Debug)]
+pub struct ManualImage {
+    pub base64_content: String,
+    pub img_ref: ImageReference,
+}
+
+impl ManualImage {
+    pub fn new(base64_content: String, img_ref: ImageReference) -> ManualImage {
+        Self {
+            base64_content,
+            img_ref,
+        }
+    }
+}
 
 /// Represents a single slide extracted from a PowerPoint (pptx) file.
 ///
@@ -91,10 +108,12 @@ impl Slide {
                     slide_txt.push('\n');
                 },
                 SlideElement::Image(image_ref) => {
+                    if self.config.image_handling_mode != ImageHandlingMode::InMarkdown { slide_txt.push('\n'); continue; }
+
                     if let Some(image_data) = self.image_data.get(&image_ref.id) {
                         let image_data = self.config.compress_images
                             .then(|| self.compress_image(image_data))
-                            .unwrap_or(Option::from(image_data.clone()));
+                            .unwrap_or_else(|| Option::from(image_data.clone()));
 
                         let base64_string = general_purpose::STANDARD.encode(image_data?);
                         let image_name = &image_ref.target.split('/').last()?;
@@ -119,10 +138,10 @@ impl Slide {
                             counters.resize(level + 1, 0);
                         }
 
-                        if level > previous_level {
-                            counters[level] = 0;
-                        } else if level < previous_level {
-                            counters.truncate(level + 1);
+                        match level.cmp(&previous_level) {
+                            std::cmp::Ordering::Greater => counters[level] = 0,
+                            std::cmp::Ordering::Less => counters.truncate(level + 1),
+                            std::cmp::Ordering::Equal => {}
                         }
 
                         counters[level] += 1;
@@ -226,6 +245,36 @@ impl Slide {
             None
         }
     }
+
+    pub fn load_images_manually(&self) -> Option<Vec<ManualImage>> {
+        let mut images: Vec<ManualImage> = Vec::new();
+
+        let image_refs: Vec<&ImageReference> = self.elements
+            .iter()
+            .filter_map(|element| match element {
+                SlideElement::Image(ref img) => Some(img),
+                _ => None,
+            })
+            .collect();
+
+        for image_ref in image_refs {
+            if let Some(image_data) = self.image_data.get(&image_ref.id) {
+                let image_data = self.config.compress_images
+                    .then( | | self.compress_image(image_data))
+                    .unwrap_or_else(|| Option::from(image_data.clone()));
+
+                let base64_str = general_purpose::STANDARD.encode(image_data?);
+
+                let image = ManualImage::new(
+                    base64_str,
+                    image_ref.clone(),
+                );
+                images.push(image);
+            }
+        }
+
+        Some(images)
+    }
 }
 
 #[cfg(test)]
@@ -296,7 +345,7 @@ mod tests {
 
         let raw_image = load_image_data("example-image.jpg");
 
-        if let Some(compression_result) = slide.compress_image(&*raw_image) {
+        if let Some(compression_result) = slide.compress_image(&raw_image) {
             assert!(compression_result.len() < raw_image.len());
         } else {
             panic!("Compression failed");
@@ -308,7 +357,7 @@ mod tests {
         let slide = mock_slide();
         let raw_image = load_image_data("example-image.jpg");
 
-        if let Some(compression_result) = slide.compress_image(&*raw_image) {
+        if let Some(compression_result) = slide.compress_image(&raw_image) {
             let result = image::load_from_memory(&compression_result);
             assert!(result.is_ok());
         } else {

diff --git a/src/types.rs b/src/types.rs
@@ -17,7 +17,7 @@ pub enum SlideElement {
     Unknown,
 }
 
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct ImageReference {
     pub id: String,
     pub target: String,