diff --git a/api/api_tests/internal/schemas/transform/test_image_caption_schema.py b/api/api_tests/internal/schemas/transform/test_image_caption_schema.py
index 12f24c358..6dbfd3784 100644
--- a/api/api_tests/internal/schemas/transform/test_image_caption_schema.py
+++ b/api/api_tests/internal/schemas/transform/test_image_caption_schema.py
@@ -14,6 +14,8 @@ def test_image_caption_extraction_schema_defaults():
     assert schema.endpoint_url.startswith("https://")
     assert schema.prompt.startswith("Caption")
     assert schema.model_name.startswith("nvidia/")
+    assert schema.context_text_max_chars == 0
+    assert schema.temperature == 1.0
     assert schema.raise_on_failure is False
 
 
@@ -40,6 +42,26 @@ def test_image_caption_extraction_schema_accepts_truthy_values():
     assert schema.raise_on_failure is False
 
 
+def test_image_caption_extraction_schema_context_text_max_chars_custom():
+    schema = ImageCaptionExtractionSchema(context_text_max_chars=512)
+    assert schema.context_text_max_chars == 512
+
+
+def test_image_caption_extraction_schema_context_text_max_chars_none_coerced():
+    schema = ImageCaptionExtractionSchema(context_text_max_chars=None)
+    assert schema.context_text_max_chars == 0
+
+
+def test_image_caption_extraction_schema_temperature_custom():
+    schema = ImageCaptionExtractionSchema(temperature=0.5)
+    assert schema.temperature == 0.5
+
+
+def test_image_caption_extraction_schema_temperature_none_coerced():
+    schema = ImageCaptionExtractionSchema(temperature=None)
+    assert schema.temperature == 1.0
+
+
 def test_image_caption_extraction_schema_rejects_extra_fields():
     with pytest.raises(ValidationError) as excinfo:
         ImageCaptionExtractionSchema(extra_field="oops")
diff --git a/api/api_tests/internal/transform/test_caption_image.py b/api/api_tests/internal/transform/test_caption_image.py
index 49e014405..a3d8327f7 100644
--- a/api/api_tests/internal/transform/test_caption_image.py
+++ b/api/api_tests/internal/transform/test_caption_image.py
@@ -72,6 +72,7 @@ def test_transform_image_create_vlm_caption_internal_happy_path(
         dummy_task_config["api_key"],
         dummy_task_config["endpoint_url"],
         dummy_task_config["model_name"],
+        temperature=1.0,
     )
 
     # Assert captions updated correctly in the DataFrame
@@ -125,6 +126,7 @@ def test_transform_image_create_vlm_caption_internal_uses_fallback_config(
         dummy_transform_config.api_key,
         dummy_transform_config.endpoint_url,
         dummy_transform_config.model_name,
+        temperature=1.0,
     )
 
     # Assert captions updated correctly
@@ -236,7 +238,7 @@ def test_generate_captions_happy_path(mock_scale, mock_create_client):
 
     # Assert infer called with correct data
     expected_payload = {"base64_images": ["scaled_b64img1", "scaled_b64img2"], "prompt": "describe this"}
-    mock_client.infer.assert_called_once_with(expected_payload, model_name="test_model")
+    mock_client.infer.assert_called_once_with(expected_payload, model_name="test_model", temperature=1.0)
 
     # Result matches mock captions
     assert result == ["Caption 1", "Caption 2"]
@@ -280,6 +282,216 @@ def test_generate_captions_empty_images_returns_empty_list(mock_scale, mock_crea
         model_name="test_model",
     )
 
-    mock_client.infer.assert_called_once_with({"base64_images": [], "prompt": "describe this"}, model_name="test_model")
+    mock_client.infer.assert_called_once_with(
+        {"base64_images": [], "prompt": "describe this"}, model_name="test_model", temperature=1.0
+    )
 
     assert result == []
+
+
+# --- _gather_context_text_for_image tests ---
+
+
+def test_gather_context_text_page_match():
+    """Page text is returned when page_number matches."""
+    image_meta = {
+        "content_metadata": {
+            "type": "image",
+            "page_number": 3,
+        },
+    }
+    page_text_map = {3: ["page three text", "more text"]}
+    result = module_under_test._gather_context_text_for_image(image_meta, page_text_map, 200)
+    assert result == "page three text more text"
+
+
+def test_gather_context_text_truncation():
+    """Text is truncated to max_chars."""
+    image_meta = {
+        "content_metadata": {
+            "type": "image",
+            "page_number": 0,
+        },
+    }
+    page_text_map = {0: ["a" * 500]}
+    result = module_under_test._gather_context_text_for_image(image_meta, page_text_map, 10)
+    assert len(result) == 10
+
+
+def test_gather_context_text_safety_cap():
+    """Text is capped at _MAX_CONTEXT_TEXT_CHARS even if max_chars is larger."""
+    image_meta = {
+        "content_metadata": {
+            "type": "image",
+            "page_number": 0,
+        },
+    }
+    big_text = "x" * 10000
+    page_text_map = {0: [big_text]}
+    result = module_under_test._gather_context_text_for_image(image_meta, page_text_map, 99999)
+    assert len(result) == module_under_test._MAX_CONTEXT_TEXT_CHARS
+
+
+def test_gather_context_text_no_text():
+    """Returns empty string when no text is available."""
+    image_meta = {
+        "content_metadata": {
+            "type": "image",
+            "page_number": 5,
+        },
+    }
+    result = module_under_test._gather_context_text_for_image(image_meta, {}, 200)
+    assert result == ""
+
+
+def test_gather_context_text_wrong_page():
+    """Returns empty string when page number doesn't match any text."""
+    image_meta = {
+        "content_metadata": {
+            "type": "image",
+            "page_number": 99,
+        },
+    }
+    page_text_map = {0: ["some text"]}
+    result = module_under_test._gather_context_text_for_image(image_meta, page_text_map, 200)
+    assert result == ""
+
+
+# --- _build_prompt_with_context tests ---
+
+
+def test_build_prompt_with_context():
+    result = module_under_test._build_prompt_with_context("Caption this:", "nearby text")
+    assert result == "Text near this image:\n---\nnearby text\n---\n\nCaption this:"
+
+
+def test_build_prompt_with_empty_context():
+    result = module_under_test._build_prompt_with_context("Caption this:", "")
+    assert result == "Caption this:"
+
+
+# --- _build_page_text_map tests ---
+
+
+def test_build_page_text_map():
+    df = pd.DataFrame(
+        [
+            {
+                "metadata": {
+                    "content": "text on page 0",
+                    "content_metadata": {"type": "text", "page_number": 0},
+                }
+            },
+            {
+                "metadata": {
+                    "content": "more on page 0",
+                    "content_metadata": {"type": "text", "page_number": 0},
+                }
+            },
+            {
+                "metadata": {
+                    "content": "image content",
+                    "content_metadata": {"type": "image", "page_number": 0},
+                }
+            },
+            {
+                "metadata": {
+                    "content": "page 1 text",
+                    "content_metadata": {"type": "text", "page_number": 1},
+                }
+            },
+        ]
+    )
+    result = module_under_test._build_page_text_map(df)
+    assert result == {0: ["text on page 0", "more on page 0"], 1: ["page 1 text"]}
+
+
+# --- Context-enabled integration tests ---
+
+
+@patch(f"{MODULE_UNDER_TEST}._generate_captions")
+def test_transform_context_enabled_per_image_calls(mock_generate, dummy_transform_config):
+    """With context enabled, each image gets its own VLM call with enriched prompt."""
+    df = pd.DataFrame(
+        [
+            {
+                "metadata": {
+                    "content": "b64_img1",
+                    "content_metadata": {"type": "image", "page_number": 0},
+                    "image_metadata": {},
+                }
+            },
+            {
+                "metadata": {
+                    "content": "page zero text",
+                    "content_metadata": {"type": "text", "page_number": 0},
+                    "image_metadata": {},
+                }
+            },
+        ]
+    )
+    mock_generate.return_value = ["caption_with_context"]
+
+    task_config = {
+        "api_key": "key",
+        "prompt": "Caption this:",
+        "system_prompt": "sys",
+        "endpoint_url": "https://url",
+        "model_name": "model",
+        "context_text_max_chars": 500,
+    }
+
+    result = transform_image_create_vlm_caption_internal(df.copy(), task_config, dummy_transform_config)
+
+    # Should be called once (one image)
+    assert mock_generate.call_count == 1
+    call_args = mock_generate.call_args
+    # The prompt should be enriched with context
+    assert "Text near this image:" in call_args[0][1]
+    assert "page zero text" in call_args[0][1]
+    assert "Caption this:" in call_args[0][1]
+    # The image should be passed individually
+    assert call_args[0][0] == ["b64_img1"]
+    # Caption should be set
+    assert result.iloc[0]["metadata"]["image_metadata"]["caption"] == "caption_with_context"
+
+
+@patch(f"{MODULE_UNDER_TEST}._generate_captions")
+def test_transform_temperature_forwarded(mock_generate, dummy_df_with_images, dummy_transform_config):
+    """Temperature from task_config is forwarded to _generate_captions."""
+    mock_generate.return_value = ["c1", "c2"]
+
+    task_config = {
+        "api_key": "key",
+        "prompt": "Describe",
+        "system_prompt": "sys",
+        "endpoint_url": "https://url",
+        "model_name": "model",
+        "temperature": 0.7,
+    }
+
+    transform_image_create_vlm_caption_internal(dummy_df_with_images.copy(), task_config, dummy_transform_config)
+
+    mock_generate.assert_called_once()
+    _, kwargs = mock_generate.call_args
+    assert kwargs["temperature"] == 0.7
+
+
+@patch(f"{MODULE_UNDER_TEST}._generate_captions")
+def test_transform_context_disabled_batch_preserved(
+    mock_generate, dummy_df_with_images, dummy_task_config, dummy_transform_config
+):
+    """With context disabled (default), batch behavior is unchanged."""
+    mock_generate.return_value = ["c1", "c2"]
+
+    _ = transform_image_create_vlm_caption_internal(
+        dummy_df_with_images.copy(), dummy_task_config, dummy_transform_config
+    )
+
+    # Should be called once in batch mode
+    mock_generate.assert_called_once()
+    call_args = mock_generate.call_args
+    # All images passed at once
+    assert call_args[0][0] == ["base64_image_1", "base64_image_2"]
+    # Prompt should NOT be enriched
+    assert "Text near this image:" not in call_args[0][1]
diff --git a/api/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py b/api/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py
index 84d78551f..458b58a77 100644
--- a/api/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py
+++ b/api/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py
@@ -113,6 +113,8 @@ class IngestTaskCaptionSchema(BaseModelNoExt):
     prompt: Optional[str] = None
     system_prompt: Optional[str] = None
     model_name: Optional[str] = None
+    context_text_max_chars: Optional[int] = None
+    temperature: Optional[float] = None
 
 
 class IngestTaskFilterParamsSchema(BaseModelNoExt):
diff --git a/api/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py b/api/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py
index 9bf68fe37..7717f9695 100644
--- a/api/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py
+++ b/api/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py
@@ -12,6 +12,8 @@ class ImageCaptionExtractionSchema(BaseModel):
     prompt: str = "Caption the content of this image:"
     system_prompt: str = "/no_think"
     model_name: str = "nvidia/nemotron-nano-12b-v2-vl"
+    context_text_max_chars: int = 0
+    temperature: float = 1.0
     raise_on_failure: bool = False
     model_config = ConfigDict(extra="forbid")
 
@@ -33,4 +35,8 @@ def _coerce_none_to_defaults(cls, values):
             values["prompt"] = cls.model_fields["prompt"].default
         if values.get("system_prompt") is None:
             values["system_prompt"] = cls.model_fields["system_prompt"].default
+        if values.get("context_text_max_chars") is None:
+            values["context_text_max_chars"] = cls.model_fields["context_text_max_chars"].default
+        if values.get("temperature") is None:
+            values["temperature"] = cls.model_fields["temperature"].default
         return values
diff --git a/api/src/nv_ingest_api/internal/transform/caption_image.py b/api/src/nv_ingest_api/internal/transform/caption_image.py
index 936896ce8..5cfe4add7 100644
--- a/api/src/nv_ingest_api/internal/transform/caption_image.py
+++ b/api/src/nv_ingest_api/internal/transform/caption_image.py
@@ -16,6 +16,75 @@
 
 logger = logging.getLogger(__name__)
 
+_MAX_CONTEXT_TEXT_CHARS = 4096
+
+
+def _gather_context_text_for_image(
+    image_meta: Dict[str, Any],
+    page_text_map: Dict[int, List[str]],
+    max_chars: int,
+) -> str:
+    """
+    Gather surrounding OCR text for an image to provide as VLM prompt context.
+
+    Parameters
+    ----------
+    image_meta : dict
+        The full metadata dict for the image row.
+    page_text_map : dict
+        Mapping of page number -> list of text strings, precomputed from the
+        DataFrame's text rows.
+    max_chars : int
+        Maximum number of characters to return. Will be clamped to
+        ``_MAX_CONTEXT_TEXT_CHARS``.
+
+    Returns
+    -------
+    str
+        Surrounding text (possibly truncated), or empty string if none found.
+    """
+    effective_max = min(max_chars, _MAX_CONTEXT_TEXT_CHARS)
+    content_meta = image_meta.get("content_metadata", {})
+    page_num = content_meta.get("page_number", -1)
+    page_texts = page_text_map.get(page_num, [])
+    if page_texts:
+        combined = " ".join(page_texts)
+        return combined[:effective_max]
+
+    return ""
+
+
+def _build_prompt_with_context(base_prompt: str, context_text: str) -> str:
+    """
+    Prepend surrounding-text context to the base VLM prompt.
+
+    If *context_text* is empty the *base_prompt* is returned unchanged.
+    """
+    if not context_text:
+        return base_prompt
+    return f"Text near this image:\n---\n{context_text}\n---\n\n{base_prompt}"
+
+
+def _build_page_text_map(df: pd.DataFrame) -> Dict[int, List[str]]:
+    """
+    Build a mapping of page number -> list of text content strings from text
+    rows in the DataFrame.  Computed once per call to avoid O(images * rows).
+    """
+    page_text_map: Dict[int, List[str]] = {}
+    for _, row in df.iterrows():
+        meta = row.get("metadata")
+        if meta is None:
+            continue
+        cm = meta.get("content_metadata", {})
+        if cm.get("type") != "text":
+            continue
+        content = meta.get("content", "")
+        if not content:
+            continue
+        page_num = cm.get("page_number", -1)
+        page_text_map.setdefault(page_num, []).append(content)
+    return page_text_map
+
 
 def _prepare_dataframes_mod(df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame, pd.Series]:
     """
@@ -62,6 +131,7 @@ def _generate_captions(
     api_key: str,
     endpoint_url: str,
     model_name: str,
+    temperature: float = 1.0,
 ) -> List[str]:
     """
     Generates captions for a list of base64-encoded PNG images using the VLM model API.
@@ -116,7 +186,7 @@ def _generate_captions(
         )
 
         # Perform inference to generate captions.
-        captions: List[str] = nim_client.infer(data, model_name=model_name)
+        captions: List[str] = nim_client.infer(data, model_name=model_name, temperature=temperature)
         return captions
 
     except Exception as e:
@@ -182,6 +252,14 @@ def transform_image_create_vlm_caption_internal(
     endpoint_url: str = task_config.get("endpoint_url") or transform_config.endpoint_url
     model_name: str = task_config.get("model_name") or transform_config.model_name
 
+    # Context text: task config overrides pipeline default.
+    context_text_max_chars: int = task_config.get("context_text_max_chars") or getattr(
+        transform_config, "context_text_max_chars", 0
+    )
+
+    # Temperature: task config overrides pipeline default.
+    temperature: float = task_config.get("temperature") or getattr(transform_config, "temperature", 1.0)
+
     # Create a mask for rows where the content type is "image".
     df_mask: pd.Series = df_transform_ledger["metadata"].apply(
         lambda meta: meta.get("content_metadata", {}).get("type") == "image"
@@ -191,26 +269,50 @@ def transform_image_create_vlm_caption_internal(
     if not df_mask.any():
         return df_transform_ledger
 
-    # Collect base64-encoded images from the rows where the content type is "image".
-    base64_images: List[str] = df_transform_ledger.loc[df_mask, "metadata"].apply(lambda meta: meta["content"]).tolist()
-
-    # Generate captions for the collected images.
-    captions: List[str] = _generate_captions(
-        base64_images,
-        prompt,
-        system_prompt,
-        api_key,
-        endpoint_url,
-        model_name,
-    )
+    if context_text_max_chars and context_text_max_chars > 0:
+        page_text_map = _build_page_text_map(df_transform_ledger)
+
+        for idx in df_transform_ledger.loc[df_mask].index:
+            meta: Dict[str, Any] = df_transform_ledger.at[idx, "metadata"]
+            base64_image: str = meta["content"]
+            context_text = _gather_context_text_for_image(meta, page_text_map, context_text_max_chars)
+            enriched_prompt = _build_prompt_with_context(prompt, context_text)
+
+            captions: List[str] = _generate_captions(
+                [base64_image],
+                enriched_prompt,
+                system_prompt,
+                api_key,
+                endpoint_url,
+                model_name,
+                temperature=temperature,
+            )
+
+            image_meta: Dict[str, Any] = meta.get("image_metadata", {})
+            image_meta["caption"] = captions[0] if captions else ""
+            meta["image_metadata"] = image_meta
+            df_transform_ledger.at[idx, "metadata"] = meta
+    else:
+        base64_images: List[str] = (
+            df_transform_ledger.loc[df_mask, "metadata"].apply(lambda meta: meta["content"]).tolist()
+        )
+
+        captions: List[str] = _generate_captions(
+            base64_images,
+            prompt,
+            system_prompt,
+            api_key,
+            endpoint_url,
+            model_name,
+            temperature=temperature,
+        )
 
-    # Update the DataFrame: assign each generated caption to the corresponding row.
-    for idx, caption in zip(df_transform_ledger.loc[df_mask].index, captions):
-        meta: Dict[str, Any] = df_transform_ledger.at[idx, "metadata"]
-        image_meta: Dict[str, Any] = meta.get("image_metadata", {})
-        image_meta["caption"] = caption
-        meta["image_metadata"] = image_meta
-        df_transform_ledger.at[idx, "metadata"] = meta
+        for idx, caption in zip(df_transform_ledger.loc[df_mask].index, captions):
+            meta: Dict[str, Any] = df_transform_ledger.at[idx, "metadata"]
+            image_meta: Dict[str, Any] = meta.get("image_metadata", {})
+            image_meta["caption"] = caption
+            meta["image_metadata"] = image_meta
+            df_transform_ledger.at[idx, "metadata"] = meta
 
     logger.debug("Image content captioning complete")
     result, execution_trace_log = df_transform_ledger, {}
diff --git a/client/client_tests/primitives/tasks/test_caption.py b/client/client_tests/primitives/tasks/test_caption.py
index dd20be95c..34a47d31b 100644
--- a/client/client_tests/primitives/tasks/test_caption.py
+++ b/client/client_tests/primitives/tasks/test_caption.py
@@ -113,6 +113,110 @@ def test_caption_task_to_dict_empty_fields():
     assert task_dict == {"type": "caption", "task_properties": {}}
 
 
+def test_caption_task_temperature_init():
+    """Test initializing CaptionTask with temperature."""
+    task = CaptionTask(temperature=0.7)
+    assert task._temperature == 0.7
+
+
+def test_caption_task_temperature_default():
+    """Test that temperature defaults to None."""
+    task = CaptionTask()
+    assert task._temperature is None
+
+
+def test_caption_task_temperature_to_dict():
+    """Test to_dict includes temperature when set."""
+    task = CaptionTask(temperature=0.5)
+    task_dict = task.to_dict()
+    assert task_dict["task_properties"]["temperature"] == 0.5
+
+
+def test_caption_task_temperature_to_dict_unset():
+    """Test to_dict excludes temperature when not set."""
+    task = CaptionTask()
+    task_dict = task.to_dict()
+    assert "temperature" not in task_dict["task_properties"]
+
+
+def test_caption_task_temperature_str():
+    """Test __str__ includes temperature when set."""
+    task = CaptionTask(temperature=0.3)
+    task_str = str(task)
+    assert "temperature: 0.3" in task_str
+
+
+def test_caption_task_temperature_str_unset():
+    """Test __str__ omits temperature when not set."""
+    task = CaptionTask()
+    task_str = str(task)
+    assert "temperature" not in task_str
+
+
+def test_caption_task_context_text_max_chars_init():
+    """Test initializing CaptionTask with context_text_max_chars."""
+    task = CaptionTask(context_text_max_chars=512)
+    assert task._context_text_max_chars == 512
+
+
+def test_caption_task_context_text_max_chars_default():
+    """Test that context_text_max_chars defaults to None."""
+    task = CaptionTask()
+    assert task._context_text_max_chars is None
+
+
+def test_caption_task_context_text_max_chars_to_dict():
+    """Test to_dict includes context_text_max_chars when set."""
+    task = CaptionTask(context_text_max_chars=256)
+    task_dict = task.to_dict()
+    assert task_dict["task_properties"]["context_text_max_chars"] == 256
+
+
+def test_caption_task_context_text_max_chars_to_dict_unset():
+    """Test to_dict excludes context_text_max_chars when not set."""
+    task = CaptionTask()
+    task_dict = task.to_dict()
+    assert "context_text_max_chars" not in task_dict["task_properties"]
+
+
+def test_caption_task_context_text_max_chars_str():
+    """Test __str__ includes context_text_max_chars when set."""
+    task = CaptionTask(context_text_max_chars=1024)
+    task_str = str(task)
+    assert "context_text_max_chars: 1024" in task_str
+
+
+def test_caption_task_context_text_max_chars_str_unset():
+    """Test __str__ omits context_text_max_chars when not set."""
+    task = CaptionTask()
+    task_str = str(task)
+    assert "context_text_max_chars" not in task_str
+
+
+def test_caption_task_schema_context_text_max_chars():
+    """Test IngestTaskCaptionSchema accepts context_text_max_chars."""
+    schema = IngestTaskCaptionSchema(context_text_max_chars=100)
+    assert schema.context_text_max_chars == 100
+
+
+def test_caption_task_schema_context_text_max_chars_default():
+    """Test IngestTaskCaptionSchema context_text_max_chars defaults to None."""
+    schema = IngestTaskCaptionSchema()
+    assert schema.context_text_max_chars is None
+
+
+def test_caption_task_schema_temperature():
+    """Test IngestTaskCaptionSchema accepts temperature."""
+    schema = IngestTaskCaptionSchema(temperature=0.5)
+    assert schema.temperature == 0.5
+
+
+def test_caption_task_schema_temperature_default():
+    """Test IngestTaskCaptionSchema temperature defaults to None."""
+    schema = IngestTaskCaptionSchema()
+    assert schema.temperature is None
+
+
 # Execute tests
 if __name__ == "__main__":
     test_caption_task_schema_valid_all_fields()
@@ -127,4 +231,12 @@ def test_caption_task_to_dict_empty_fields():
     test_caption_task_to_dict_all_fields()
     test_caption_task_to_dict_partial_fields()
     test_caption_task_to_dict_empty_fields()
+    test_caption_task_context_text_max_chars_init()
+    test_caption_task_context_text_max_chars_default()
+    test_caption_task_context_text_max_chars_to_dict()
+    test_caption_task_context_text_max_chars_to_dict_unset()
+    test_caption_task_context_text_max_chars_str()
+    test_caption_task_context_text_max_chars_str_unset()
+    test_caption_task_schema_context_text_max_chars()
+    test_caption_task_schema_context_text_max_chars_default()
     print("All tests passed.")
diff --git a/client/src/nv_ingest_client/client/interface.py b/client/src/nv_ingest_client/client/interface.py
index de423d295..c991eb8b5 100644
--- a/client/src/nv_ingest_client/client/interface.py
+++ b/client/src/nv_ingest_client/client/interface.py
@@ -1524,6 +1524,8 @@ def caption(self, **kwargs: Any) -> "Ingestor":
             "prompt": task_options.prompt,
             "system_prompt": task_options.system_prompt,
             "model_name": task_options.model_name,
+            "context_text_max_chars": task_options.context_text_max_chars,
+            "temperature": task_options.temperature,
         }
         caption_task = CaptionTask(**caption_params)
         self._job_specs.add_task(caption_task)
diff --git a/client/src/nv_ingest_client/primitives/tasks/caption.py b/client/src/nv_ingest_client/primitives/tasks/caption.py
index 93c08a2b4..5d10a5056 100644
--- a/client/src/nv_ingest_client/primitives/tasks/caption.py
+++ b/client/src/nv_ingest_client/primitives/tasks/caption.py
@@ -24,6 +24,8 @@ def __init__(
         prompt: str = None,
         system_prompt: str = None,
         model_name: str = None,
+        context_text_max_chars: int = None,
+        temperature: float = None,
     ) -> None:
         super().__init__()
 
@@ -34,6 +36,8 @@ def __init__(
             prompt=prompt,
             system_prompt=system_prompt,
             model_name=model_name,
+            context_text_max_chars=context_text_max_chars,
+            temperature=temperature,
         )
 
         self._api_key = validated_data.api_key
@@ -41,6 +45,8 @@ def __init__(
         self._prompt = validated_data.prompt
         self._system_prompt = validated_data.system_prompt
         self._model_name = validated_data.model_name
+        self._context_text_max_chars = validated_data.context_text_max_chars
+        self._temperature = validated_data.temperature
 
     def __str__(self) -> str:
         """
@@ -59,6 +65,10 @@ def __str__(self) -> str:
             info += f"  system_prompt: {self._system_prompt}\n"
         if self._model_name:
             info += f"  model_name: {self._model_name}\n"
+        if self._context_text_max_chars:
+            info += f"  context_text_max_chars: {self._context_text_max_chars}\n"
+        if self._temperature is not None:
+            info += f"  temperature: {self._temperature}\n"
 
         return info
 
@@ -83,4 +93,10 @@ def to_dict(self) -> Dict:
         if self._model_name:
             task_properties["model_name"] = self._model_name
 
+        if self._context_text_max_chars:
+            task_properties["context_text_max_chars"] = self._context_text_max_chars
+
+        if self._temperature is not None:
+            task_properties["temperature"] = self._temperature
+
         return {"type": "caption", "task_properties": task_properties}
diff --git a/docs/docs/extraction/vlm-embed.md b/docs/docs/extraction/vlm-embed.md
index ed36a7fcd..123c9bf33 100644
--- a/docs/docs/extraction/vlm-embed.md
+++ b/docs/docs/extraction/vlm-embed.md
@@ -217,4 +217,4 @@ results = ingestor.ingest()
 - [Support Matrix](support-matrix.md)
 - [Troubleshoot Nemo Retriever Extraction](troubleshoot.md)
 - [Use the NV-Ingest Python API](nv-ingest-python-api.md)
-- [Extract Captions from Images](nv-ingest-python-api.md#extract-captions-from-images)
\ No newline at end of file
+- [Extract Captions from Images](nv-ingest-python-api.md#extract-captions-from-images)