@@ -49,7 +49,7 @@ def create(
4949 * ,
5050 model : Literal ["kanon-2-enricher" ],
5151 texts : Union [SequenceNotStr [str ], str ],
52- overflow_strategy : Optional [Literal ["auto" , "drop_end" ]] | Omit = omit ,
52+ overflow_strategy : Optional [Literal ["auto" , "drop_end" , "chunk" ]] | Omit = omit ,
5353 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
5454 # The extra values given here take precedence over values defined on the client or passed to this method.
5555 extra_headers : Headers | None = None ,
@@ -71,9 +71,15 @@ def create(
7171
7272 overflow_strategy: The strategy for handling content exceeding the model's maximum input length.
7373
74- `auto` currently behaves the same as `drop_end`, dropping excess tokens from the
75- end of input. In the future, `auto` may implement more sophisticated strategies
76- such as chunking and context-aware stitching.
74+ `auto`, which is the default and recommended setting, currently behaves the same
75+ as `chunk`, which intelligently breaks the input up into smaller chunks and then
76+ stitches the results back together into a single prediction. In the future
77+ `auto` may implement even more sophisticated strategies for handling long
78+ contexts such as leveraging chunk overlap and/or a specialized stitching model.
79+
80+ `chunk` breaks the input up into smaller chunks that fit within the model's
81+ context window and then intelligently merges the results into a single
82+ prediction at the cost of a minor accuracy drop.
7783
7884 `drop_end` drops tokens from the end of input exceeding the model's maximum
7985 input length.
@@ -131,7 +137,7 @@ async def create(
131137 * ,
132138 model : Literal ["kanon-2-enricher" ],
133139 texts : Union [SequenceNotStr [str ], str ],
134- overflow_strategy : Optional [Literal ["auto" , "drop_end" ]] | Omit = omit ,
140+ overflow_strategy : Optional [Literal ["auto" , "drop_end" , "chunk" ]] | Omit = omit ,
135141 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
136142 # The extra values given here take precedence over values defined on the client or passed to this method.
137143 extra_headers : Headers | None = None ,
@@ -153,9 +159,15 @@ async def create(
153159
154160 overflow_strategy: The strategy for handling content exceeding the model's maximum input length.
155161
156- `auto` currently behaves the same as `drop_end`, dropping excess tokens from the
157- end of input. In the future, `auto` may implement more sophisticated strategies
158- such as chunking and context-aware stitching.
162+ `auto`, which is the default and recommended setting, currently behaves the same
163+ as `chunk`, which intelligently breaks the input up into smaller chunks and then
164+ stitches the results back together into a single prediction. In the future
165+ `auto` may implement even more sophisticated strategies for handling long
166+ contexts such as leveraging chunk overlap and/or a specialized stitching model.
167+
168+ `chunk` breaks the input up into smaller chunks that fit within the model's
169+ context window and then intelligently merges the results into a single
170+ prediction at the cost of a minor accuracy drop.
159171
160172 `drop_end` drops tokens from the end of input exceeding the model's maximum
161173 input length.
0 commit comments