server-sdk-typescript/src/api/types/OpenAiModel.ts at 76950264c8f711113e2ed7c5bcbc022bfb04b1e8 · VapiAI/server-sdk-typescript · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
// This file was auto-generated by Fern from our API Definition.

import type * as Vapi from "../index.js";

export interface OpenAiModel {
    /** This is the starting state for the conversation. */
    messages?: Vapi.OpenAiMessage[];
    /**
     * These are the tools that the assistant can use during the call. To use existing tools, use `toolIds`.
     *
     * Both `tools` and `toolIds` can be used together.
     */
    tools?: Vapi.OpenAiModelToolsItem[];
    /**
     * These are the tools that the assistant can use during the call. To use transient tools, use `tools`.
     *
     * Both `tools` and `toolIds` can be used together.
     */
    toolIds?: string[];
    /** These are the options for the knowledge base. */
    knowledgeBase?: Vapi.CreateCustomKnowledgeBaseDto;
    /** This is the provider that will be used for the model. */
    provider: Vapi.OpenAiModelProvider;
    /**
     * This is the OpenAI model that will be used.
     *
     * When using Vapi OpenAI or your own Azure Credentials, you have the option to specify the region for the selected model. This shouldn't be specified unless you have a specific reason to do so. Vapi will automatically find the fastest region that make sense.
     * This is helpful when you are required to comply with Data Residency rules. Learn more about Azure regions here https://azure.microsoft.com/en-us/explore/global-infrastructure/data-residency/.
     *
     * @default undefined
     */
    model: Vapi.OpenAiModelModel;
    /** These are the fallback models that will be used if the primary model fails. This shouldn't be specified unless you have a specific reason to do so. Vapi will automatically find the fastest fallbacks that make sense. */
    fallbackModels?: Vapi.OpenAiModelFallbackModelsItem[];
    /**
     * Azure OpenAI doesn't support `maxLength` right now https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/structured-outputs?tabs=python-secure%2Cdotnet-entra-id&pivots=programming-language-csharp#unsupported-type-specific-keywords. Need to strip.
     *
     * - `strip-parameters-with-unsupported-validation` will strip parameters with unsupported validation.
     * - `strip-unsupported-validation` will keep the parameters but strip unsupported validation.
     *
     * @default `strip-unsupported-validation`
     */
    toolStrictCompatibilityMode?: Vapi.OpenAiModelToolStrictCompatibilityMode;
    /**
     * This controls the prompt cache retention policy for models that support extended caching (GPT-4.1, GPT-5 series).
     *
     * - `in_memory`: Default behavior, cache retained in GPU memory only
     * - `24h`: Extended caching, keeps cached prefixes active for up to 24 hours by offloading to GPU-local storage
     *
     * Only applies to models: gpt-5.2, gpt-5.1, gpt-5.1-codex, gpt-5.1-codex-mini, gpt-5.1-chat-latest, gpt-5, gpt-5-codex, gpt-4.1
     *
     * @default undefined (uses API default which is 'in_memory')
     */
    promptCacheRetention?: Vapi.OpenAiModelPromptCacheRetention;
    /**
     * This is the prompt cache key for models that support extended caching (GPT-4.1, GPT-5 series).
     *
     * Providing a cache key allows you to share cached prefixes across requests.
     *
     * @default undefined
     */
    promptCacheKey?: string;
    /** This is the temperature that will be used for calls. Default is 0 to leverage caching for lower latency. */
    temperature?: number;
    /** This is the max number of tokens that the assistant will be allowed to generate in each turn of the conversation. Default is 250. */
    maxTokens?: number;
    /**
     * This determines whether we detect user's emotion while they speak and send it as an additional info to model.
     *
     * Default `false` because the model is usually are good at understanding the user's emotion from text.
     *
     * @default false
     */
    emotionRecognitionEnabled?: boolean;
    /**
     * This sets how many turns at the start of the conversation to use a smaller, faster model from the same provider before switching to the primary model. Example, gpt-3.5-turbo if provider is openai.
     *
     * Default is 0.
     *
     * @default 0
     */
    numFastTurns?: number;
    /**
     * If specified, the system will make a best effort to sample deterministically,
     * such that repeated requests with the same seed and parameters should return the same result.
     * Determinism is not guaranteed.
     *
     * @see https://platform.openai.com/docs/api-reference/chat/create#chat-create-seed
     */
    seed?: number;
    /**
     * An alternative to sampling with temperature, called nucleus sampling,
     * where the model considers the results of the tokens with top_p probability mass.
     * So 0.1 means only the tokens comprising the top 10% probability mass are considered.
     *
     * We generally recommend altering this or temperature but not both.
     *
     * @default 1
     * @see https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p
     */
    topP?: number;
    /**
     * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
     * frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
     *
     * @default 0
     * @see https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty
     */
    frequencyPenalty?: number;
    /**
     * Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they
     * appear in the text so far, increasing the model's likelihood to talk about new topics.
     *
     * @default 0
     * @see https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty
     */
    presencePenalty?: number;
    /**
     * Whether to return log probabilities of the output tokens or not.
     * If true, returns the log probabilities of each output token returned in the content of message.
     *
     * @default false
     * @see https://platform.openai.com/docs/api-reference/chat/create#chat-create-logprobs
     */
    logprobs?: boolean;
    /**
     * An integer between 0 and 20 specifying the number of most likely tokens to return at each
     * token position, each with an associated log probability. logprobs must be set to true if
     * this parameter is used.
     *
     * @see https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_logprobs
     */
    topLogprobs?: number;
    /**
     * Whether to enable parallel function calling during tool use.
     * When set to true, the model can call multiple functions in a single response.
     *
     * @default true
     * @see https://platform.openai.com/docs/guides/function-calling#parallel-function-calling
     */
    parallelToolCalls?: boolean;
    /**
     * Constrains effort on reasoning for reasoning models (o1, o3, etc.).
     * Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
     *
     * Possible values: "low", "medium", "high"
     *
     * @default "medium"
     * @see https://platform.openai.com/docs/api-reference/chat/create#chat-create-reasoning_effort
     */
    reasoningEffort?: Vapi.OpenAiModelReasoningEffort;
}