diff --git a/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc b/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
index 2625c11f2..241213c44 100644
--- a/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
+++ b/docs/en/stack/ml/nlp/ml-nlp-elser.asciidoc
@@ -124,7 +124,11 @@ PUT _inference/sparse_embedding/my-elser-model
 {
   "service": "elasticsearch",
   "service_settings": {
-    "num_allocations": 1,
+    "adaptive_allocations": {
+      "enabled": true,
+      "min_number_of_allocations": 1,
+      "max_number_of_allocations": 10
+    },
     "num_threads": 1,
     "model_id": ".elser_model_2_linux-x86_64"
   }
@@ -132,6 +136,7 @@ PUT _inference/sparse_embedding/my-elser-model
 ----------------------------------
 --
 The API request automatically initiates the model download and then deploy the model.
+This example uses <<ml-nlp-auto-scale,autoscaling>> through adaptive allocation.
 
 Refer to the {ref}/infer-service-elser.html[ELSER {infer} service documentation] to learn more about the available settings.