Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(api): docs and response_format response property #778

Merged
merged 1 commit into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 36 additions & 2 deletions src/resources/beta/assistants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,47 @@ export interface Assistant {
*/
tools: Array<AssistantTool>;

/**
* Specifies the format that the model must output. Compatible with
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
*
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
* message the model generates is valid JSON.
*
* **Important:** when using JSON mode, you **must** also instruct the model to
* produce JSON yourself via a system or user message. Without this, the model may
* generate an unending stream of whitespace until the generation reaches the token
* limit, resulting in a long-running and seemingly "stuck" request. Also note that
* the message content may be partially cut off if `finish_reason="length"`, which
* indicates the generation exceeded `max_tokens` or the conversation exceeded the
* max context length.
*/
response_format?: ThreadsAPI.AssistantResponseFormatOption | null;

/**
* What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
* make the output more random, while lower values like 0.2 will make it more
* focused and deterministic.
*/
temperature?: number | null;

/**
* A set of resources that are used by the assistant's tools. The resources are
* specific to the type of tool. For example, the `code_interpreter` tool requires
* a list of file IDs, while the `file_search` tool requires a list of vector store
* IDs.
*/
tool_resources?: Assistant.ToolResources | null;

/**
* An alternative to sampling with temperature, called nucleus sampling, where the
* model considers the results of the tokens with top_p probability mass. So 0.1
* means only the tokens comprising the top 10% probability mass are considered.
*
* We generally recommend altering this or temperature but not both.
*/
top_p?: number | null;
}

export namespace Assistant {
Expand Down Expand Up @@ -1012,7 +1046,7 @@ export interface AssistantCreateParams {
/**
* Specifies the format that the model must output. Compatible with
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
* all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
*
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
* message the model generates is valid JSON.
Expand Down Expand Up @@ -1158,7 +1192,7 @@ export interface AssistantUpdateParams {
/**
* Specifies the format that the model must output. Compatible with
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
* all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
*
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
* message the model generates is valid JSON.
Expand Down
84 changes: 66 additions & 18 deletions src/resources/beta/threads/runs/runs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ export interface Run {
/**
* Specifies the format that the model must output. Compatible with
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
* all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
*
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
* message the model generates is valid JSON.
Expand Down Expand Up @@ -446,7 +446,7 @@ export interface Run {
* Controls which (if any) tool is called by the model. `none` means the model will
* not call any tools and instead generates a message. `auto` is the default value
* and means the model can pick between generating a message or calling a tool.
* Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
* Specifying a particular tool like `{"type": "file_search"}` or
* `{"type": "function", "function": {"name": "my_function"}}` forces the model to
* call that tool.
*/
Expand All @@ -459,6 +459,10 @@ export interface Run {
*/
tools: Array<AssistantsAPI.AssistantTool>;

/**
* Controls for how a thread will be truncated prior to the run. Use this to
* control the intial context window of the run.
*/
truncation_strategy: Run.TruncationStrategy | null;

/**
Expand Down Expand Up @@ -534,6 +538,10 @@ export namespace Run {
}
}

/**
* Controls for how a thread will be truncated prior to the run. Use this to
* control the intial context window of the run.
*/
export interface TruncationStrategy {
/**
* The truncation strategy to use for the thread. The default is `auto`. If set to
Expand Down Expand Up @@ -620,7 +628,7 @@ export interface RunCreateParamsBase {
* The maximum number of completion tokens that may be used over the course of the
* run. The run will make a best effort to use only the number of completion tokens
* specified, across multiple turns of the run. If the run exceeds the number of
* completion tokens specified, the run will end with status `complete`. See
* completion tokens specified, the run will end with status `incomplete`. See
* `incomplete_details` for more info.
*/
max_completion_tokens?: number | null;
Expand All @@ -629,7 +637,7 @@ export interface RunCreateParamsBase {
* The maximum number of prompt tokens that may be used over the course of the run.
* The run will make a best effort to use only the number of prompt tokens
* specified, across multiple turns of the run. If the run exceeds the number of
* prompt tokens specified, the run will end with status `complete`. See
* prompt tokens specified, the run will end with status `incomplete`. See
* `incomplete_details` for more info.
*/
max_prompt_tokens?: number | null;
Expand Down Expand Up @@ -673,7 +681,7 @@ export interface RunCreateParamsBase {
/**
* Specifies the format that the model must output. Compatible with
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
* all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
*
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
* message the model generates is valid JSON.
Expand Down Expand Up @@ -706,7 +714,7 @@ export interface RunCreateParamsBase {
* Controls which (if any) tool is called by the model. `none` means the model will
* not call any tools and instead generates a message. `auto` is the default value
* and means the model can pick between generating a message or calling a tool.
* Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
* Specifying a particular tool like `{"type": "file_search"}` or
* `{"type": "function", "function": {"name": "my_function"}}` forces the model to
* call that tool.
*/
Expand All @@ -722,9 +730,15 @@ export interface RunCreateParamsBase {
* An alternative to sampling with temperature, called nucleus sampling, where the
* model considers the results of the tokens with top_p probability mass. So 0.1
* means only the tokens comprising the top 10% probability mass are considered.
*
* We generally recommend altering this or temperature but not both.
*/
top_p?: number | null;

/**
* Controls for how a thread will be truncated prior to the run. Use this to
* control the intial context window of the run.
*/
truncation_strategy?: RunCreateParams.TruncationStrategy | null;
}

Expand Down Expand Up @@ -770,6 +784,10 @@ export namespace RunCreateParams {
}
}

/**
* Controls for how a thread will be truncated prior to the run. Use this to
* control the intial context window of the run.
*/
export interface TruncationStrategy {
/**
* The truncation strategy to use for the thread. The default is `auto`. If set to
Expand Down Expand Up @@ -865,7 +883,7 @@ export interface RunCreateAndPollParams {
* The maximum number of completion tokens that may be used over the course of the
* run. The run will make a best effort to use only the number of completion tokens
* specified, across multiple turns of the run. If the run exceeds the number of
* completion tokens specified, the run will end with status `complete`. See
* completion tokens specified, the run will end with status `incomplete`. See
* `incomplete_details` for more info.
*/
max_completion_tokens?: number | null;
Expand All @@ -874,7 +892,7 @@ export interface RunCreateAndPollParams {
* The maximum number of prompt tokens that may be used over the course of the run.
* The run will make a best effort to use only the number of prompt tokens
* specified, across multiple turns of the run. If the run exceeds the number of
* prompt tokens specified, the run will end with status `complete`. See
* prompt tokens specified, the run will end with status `incomplete`. See
* `incomplete_details` for more info.
*/
max_prompt_tokens?: number | null;
Expand Down Expand Up @@ -918,7 +936,7 @@ export interface RunCreateAndPollParams {
/**
* Specifies the format that the model must output. Compatible with
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
* all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
*
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
* message the model generates is valid JSON.
Expand All @@ -944,7 +962,7 @@ export interface RunCreateAndPollParams {
* Controls which (if any) tool is called by the model. `none` means the model will
* not call any tools and instead generates a message. `auto` is the default value
* and means the model can pick between generating a message or calling a tool.
* Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
* Specifying a particular tool like `{"type": "file_search"}` or
* `{"type": "function", "function": {"name": "my_function"}}` forces the model to
* call that tool.
*/
Expand All @@ -960,9 +978,15 @@ export interface RunCreateAndPollParams {
* An alternative to sampling with temperature, called nucleus sampling, where the
* model considers the results of the tokens with top_p probability mass. So 0.1
* means only the tokens comprising the top 10% probability mass are considered.
*
* We generally recommend altering this or temperature but not both.
*/
top_p?: number | null;

/**
* Controls for how a thread will be truncated prior to the run. Use this to
* control the intial context window of the run.
*/
truncation_strategy?: RunCreateAndPollParams.TruncationStrategy | null;
}

Expand Down Expand Up @@ -1008,6 +1032,10 @@ export namespace RunCreateAndPollParams {
}
}

/**
* Controls for how a thread will be truncated prior to the run. Use this to
* control the intial context window of the run.
*/
export interface TruncationStrategy {
/**
* The truncation strategy to use for the thread. The default is `auto`. If set to
Expand Down Expand Up @@ -1056,7 +1084,7 @@ export interface RunCreateAndStreamParams {
* The maximum number of completion tokens that may be used over the course of the
* run. The run will make a best effort to use only the number of completion tokens
* specified, across multiple turns of the run. If the run exceeds the number of
* completion tokens specified, the run will end with status `complete`. See
* completion tokens specified, the run will end with status `incomplete`. See
* `incomplete_details` for more info.
*/
max_completion_tokens?: number | null;
Expand All @@ -1065,7 +1093,7 @@ export interface RunCreateAndStreamParams {
* The maximum number of prompt tokens that may be used over the course of the run.
* The run will make a best effort to use only the number of prompt tokens
* specified, across multiple turns of the run. If the run exceeds the number of
* prompt tokens specified, the run will end with status `complete`. See
* prompt tokens specified, the run will end with status `incomplete`. See
* `incomplete_details` for more info.
*/
max_prompt_tokens?: number | null;
Expand Down Expand Up @@ -1109,7 +1137,7 @@ export interface RunCreateAndStreamParams {
/**
* Specifies the format that the model must output. Compatible with
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
* all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
*
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
* message the model generates is valid JSON.
Expand All @@ -1135,7 +1163,7 @@ export interface RunCreateAndStreamParams {
* Controls which (if any) tool is called by the model. `none` means the model will
* not call any tools and instead generates a message. `auto` is the default value
* and means the model can pick between generating a message or calling a tool.
* Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
* Specifying a particular tool like `{"type": "file_search"}` or
* `{"type": "function", "function": {"name": "my_function"}}` forces the model to
* call that tool.
*/
Expand All @@ -1151,9 +1179,15 @@ export interface RunCreateAndStreamParams {
* An alternative to sampling with temperature, called nucleus sampling, where the
* model considers the results of the tokens with top_p probability mass. So 0.1
* means only the tokens comprising the top 10% probability mass are considered.
*
* We generally recommend altering this or temperature but not both.
*/
top_p?: number | null;

/**
* Controls for how a thread will be truncated prior to the run. Use this to
* control the intial context window of the run.
*/
truncation_strategy?: RunCreateAndStreamParams.TruncationStrategy | null;
}

Expand Down Expand Up @@ -1199,6 +1233,10 @@ export namespace RunCreateAndStreamParams {
}
}

/**
* Controls for how a thread will be truncated prior to the run. Use this to
* control the intial context window of the run.
*/
export interface TruncationStrategy {
/**
* The truncation strategy to use for the thread. The default is `auto`. If set to
Expand Down Expand Up @@ -1247,7 +1285,7 @@ export interface RunStreamParams {
* The maximum number of completion tokens that may be used over the course of the
* run. The run will make a best effort to use only the number of completion tokens
* specified, across multiple turns of the run. If the run exceeds the number of
* completion tokens specified, the run will end with status `complete`. See
* completion tokens specified, the run will end with status `incomplete`. See
* `incomplete_details` for more info.
*/
max_completion_tokens?: number | null;
Expand All @@ -1256,7 +1294,7 @@ export interface RunStreamParams {
* The maximum number of prompt tokens that may be used over the course of the run.
* The run will make a best effort to use only the number of prompt tokens
* specified, across multiple turns of the run. If the run exceeds the number of
* prompt tokens specified, the run will end with status `complete`. See
* prompt tokens specified, the run will end with status `incomplete`. See
* `incomplete_details` for more info.
*/
max_prompt_tokens?: number | null;
Expand Down Expand Up @@ -1300,7 +1338,7 @@ export interface RunStreamParams {
/**
* Specifies the format that the model must output. Compatible with
* [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
* all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
* all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
*
* Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
* message the model generates is valid JSON.
Expand All @@ -1326,7 +1364,7 @@ export interface RunStreamParams {
* Controls which (if any) tool is called by the model. `none` means the model will
* not call any tools and instead generates a message. `auto` is the default value
* and means the model can pick between generating a message or calling a tool.
* Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
* Specifying a particular tool like `{"type": "file_search"}` or
* `{"type": "function", "function": {"name": "my_function"}}` forces the model to
* call that tool.
*/
Expand All @@ -1342,9 +1380,15 @@ export interface RunStreamParams {
* An alternative to sampling with temperature, called nucleus sampling, where the
* model considers the results of the tokens with top_p probability mass. So 0.1
* means only the tokens comprising the top 10% probability mass are considered.
*
* We generally recommend altering this or temperature but not both.
*/
top_p?: number | null;

/**
* Controls for how a thread will be truncated prior to the run. Use this to
* control the intial context window of the run.
*/
truncation_strategy?: RunStreamParams.TruncationStrategy | null;
}

Expand Down Expand Up @@ -1390,6 +1434,10 @@ export namespace RunStreamParams {
}
}

/**
* Controls for how a thread will be truncated prior to the run. Use this to
* control the intial context window of the run.
*/
export interface TruncationStrategy {
/**
* The truncation strategy to use for the thread. The default is `auto`. If set to
Expand Down
Loading