diff --git a/docs/reference/api-reference.md b/docs/reference/api-reference.md index baa9031c9..055906a5a 100644 --- a/docs/reference/api-reference.md +++ b/docs/reference/api-reference.md @@ -7878,7 +7878,7 @@ client.inference.putCustom({ task_type, custom_inference_id, service, service_se - **`task_type` (Enum("text_embedding" \| "sparse_embedding" \| "rerank" \| "completion"))**: The type of the inference task that the model will perform. - **`custom_inference_id` (string)**: The unique identifier of the inference endpoint. - **`service` (Enum("custom"))**: The type of service supported for the specified task type. In this case, `custom`. -- **`service_settings` ({ headers, input_type, query_parameters, request, response, secret_parameters, url })**: Settings used to install the inference model. +- **`service_settings` ({ batch_size, headers, input_type, query_parameters, request, response, secret_parameters, url })**: Settings used to install the inference model. These settings are specific to the `custom` service. - **`chunking_settings` (Optional, { max_chunk_size, overlap, sentence_overlap, separator_group, separators, strategy })**: The chunking configuration object. - **`task_settings` (Optional, { parameters })**: Settings to configure the inference task. diff --git a/src/api/types.ts b/src/api/types.ts index 4caf5ee8d..cea5eb911 100644 --- a/src/api/types.ts +++ b/src/api/types.ts @@ -7420,7 +7420,7 @@ export interface AnalysisKeywordTokenizer extends AnalysisTokenizerBase { export interface AnalysisKuromojiAnalyzer { type: 'kuromoji' - mode: AnalysisKuromojiTokenizationMode + mode?: AnalysisKuromojiTokenizationMode user_dictionary?: string } @@ -22979,7 +22979,11 @@ export interface InferenceCustomResponseParams { } export interface InferenceCustomServiceSettings { - /** Specifies the HTTPS header parameters – such as `Authentication` or `Contet-Type` – that are required to access the custom service. + /** Specifies the batch size used for the semantic_text field. If the field is not provided, the default is 10. + * The batch size is the maximum number of inputs in a single request to the upstream service. + * The chunk within the batch are controlled by the selected chunking strategy for the semantic_text field. */ + batch_size?: integer + /** Specifies the HTTP header parameters – such as `Authentication` or `Content-Type` – that are required to access the custom service. * For example: * ``` * "headers":{