-
Notifications
You must be signed in to change notification settings - Fork 431
/
Copy pathChatEndpoint.cs
223 lines (205 loc) · 14.8 KB
/
ChatEndpoint.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
using OpenAI_API.Models;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
namespace OpenAI_API.Chat
{
/// <summary>
/// ChatGPT API endpoint. Use this endpoint to send multiple messages and carry on a conversation.
/// </summary>
public class ChatEndpoint : EndpointBase, IChatEndpoint
{
/// <summary>
/// This allows you to set default parameters for every request, for example to set a default temperature or max tokens. For every request, if you do not have a parameter set on the request but do have it set here as a default, the request will automatically pick up the default value.
/// </summary>
public ChatRequest DefaultChatRequestArgs { get; set; } = new ChatRequest() { Model = Model.ChatGPTTurbo };
/// <summary>
/// The name of the endpoint, which is the final path segment in the API URL. For example, "completions".
/// </summary>
protected override string Endpoint { get { return "chat/completions"; } }
/// <summary>
/// Constructor of the api endpoint. Rather than instantiating this yourself, access it through an instance of <see cref="OpenAIAPI"/> as <see cref="OpenAIAPI.Completions"/>.
/// </summary>
/// <param name="api"></param>
internal ChatEndpoint(OpenAIAPI api) : base(api) { }
/// <summary>
/// Constructor of the api endpoint. Rather than instantiating this yourself, access it through an instance of <see cref="OpenAIAPI"/> as <see cref="OpenAIAPI.Completions"/>.
/// </summary>
/// <param name="httpClient"></param>
/// <param name="api"></param>
internal ChatEndpoint(HttpClient httpClient, OpenAIAPI api) : base(api) { }
/// <summary>
/// Creates an ongoing chat which can easily encapsulate the conversation. This is the simplest way to use the Chat endpoint.
/// </summary>
/// <returns></returns>
public Conversation CreateConversation()
{
return new Conversation(this, defaultChatRequestArgs: DefaultChatRequestArgs);
}
#region Non-streaming
/// <summary>
/// Ask the API to complete the request using the specified parameters. This is non-streaming, so it will wait until the API returns the full result. Any non-specified parameters will fall back to default values specified in <see cref="DefaultChatRequestArgs"/> if present.
/// </summary>
/// <param name="request">The request to send to the API.</param>
/// <returns>Asynchronously returns the completion result. Look in its <see cref="ChatResult.Choices"/> property for the results.</returns>
public async Task<ChatResult> CreateChatCompletionAsync(ChatRequest request)
{
return await HttpPost<ChatResult>(postData: request);
}
/// <summary>
/// Ask the API to complete the request using the specified parameters. This is non-streaming, so it will wait until the API returns the full result. Any non-specified parameters will fall back to default values specified in <see cref="DefaultChatRequestArgs"/> if present.
/// </summary>
/// <param name="request">The request to send to the API.</param>
/// <param name="numOutputs">Overrides <see cref="ChatRequest.NumChoicesPerMessage"/> as a convenience.</param>
/// <returns>Asynchronously returns the completion result. Look in its <see cref="ChatResult.Choices"/> property for the results.</returns>
public Task<ChatResult> CreateChatCompletionAsync(ChatRequest request, int numOutputs = 5)
{
request.NumChoicesPerMessage = numOutputs;
return CreateChatCompletionAsync(request);
}
/// <summary>
/// Ask the API to complete the request using the specified parameters. This is non-streaming, so it will wait until the API returns the full result. Any non-specified parameters will fall back to default values specified in <see cref="DefaultChatRequestArgs"/> if present.
/// </summary>
/// <param name="messages">The array of messages to send to the API</param>
/// <param name="model">The model to use. See the ChatGPT models available from <see cref="ModelsEndpoint.GetModelsAsync()"/></param>
/// <param name="temperature">What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. It is generally recommend to use this or <paramref name="top_p"/> but not both.</param>
/// <param name="top_p">An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. It is generally recommend to use this or <paramref name="temperature"/> but not both.</param>
/// <param name="numOutputs">How many different choices to request for each prompt.</param>
/// <param name="max_tokens">How many tokens to complete to. Can return fewer if a stop sequence is hit.</param>
/// <param name="frequencyPenalty">The scale of the penalty for how often a token is used. Should generally be between 0 and 1, although negative numbers are allowed to encourage token reuse.</param>
/// <param name="presencePenalty">The scale of the penalty applied if a token is already present at all. Should generally be between 0 and 1, although negative numbers are allowed to encourage token reuse.</param>
/// <param name="logitBias">Maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.</param>
/// <param name="stopSequences">One or more sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.</param>
/// <returns>Asynchronously returns the completion result. Look in its <see cref="ChatResult.Choices"/> property for the results.</returns>
public Task<ChatResult> CreateChatCompletionAsync(IList<ChatMessage> messages,
Model model = null,
double? temperature = null,
double? top_p = null,
int? numOutputs = null,
int? max_tokens = null,
double? frequencyPenalty = null,
double? presencePenalty = null,
IReadOnlyDictionary<string, float> logitBias = null,
params string[] stopSequences)
{
ChatRequest request = new ChatRequest(DefaultChatRequestArgs)
{
Messages = messages,
Model = model ?? DefaultChatRequestArgs.Model,
Temperature = temperature ?? DefaultChatRequestArgs.Temperature,
TopP = top_p ?? DefaultChatRequestArgs.TopP,
NumChoicesPerMessage = numOutputs ?? DefaultChatRequestArgs.NumChoicesPerMessage,
MultipleStopSequences = stopSequences ?? DefaultChatRequestArgs.MultipleStopSequences,
MaxTokens = max_tokens ?? DefaultChatRequestArgs.MaxTokens,
FrequencyPenalty = frequencyPenalty ?? DefaultChatRequestArgs.FrequencyPenalty,
PresencePenalty = presencePenalty ?? DefaultChatRequestArgs.PresencePenalty,
LogitBias = logitBias ?? DefaultChatRequestArgs.LogitBias
};
return CreateChatCompletionAsync(request);
}
/// <summary>
/// Ask the API to complete the request using the specified message(s). Any parameters will fall back to default values specified in <see cref="DefaultChatRequestArgs"/> if present.
/// </summary>
/// <param name="messages">The messages to use in the generation.</param>
/// <returns>The <see cref="ChatResult"/> with the API response.</returns>
public Task<ChatResult> CreateChatCompletionAsync(params ChatMessage[] messages)
{
ChatRequest request = new ChatRequest(DefaultChatRequestArgs)
{
Messages = messages
};
return CreateChatCompletionAsync(request);
}
/// <summary>
/// Ask the API to complete the request using the specified message(s). Any parameters will fall back to default values specified in <see cref="DefaultChatRequestArgs"/> if present.
/// </summary>
/// <param name="userMessages">The user message or messages to use in the generation. All strings are assumed to be of Role <see cref="ChatMessageRole.User"/></param>
/// <returns>The <see cref="ChatResult"/> with the API response.</returns>
public Task<ChatResult> CreateChatCompletionAsync(params string[] userMessages) => CreateChatCompletionAsync(userMessages.Select(m => new ChatMessage(ChatMessageRole.User, m)).ToArray());
#endregion
#region Streaming
/// <summary>
/// Ask the API to complete the message(s) using the specified request, and stream the results to the <paramref name="resultHandler"/> as they come in.
/// If you are on the latest C# supporting async enumerables, you may prefer the cleaner syntax of <see cref="StreamChatEnumerableAsync(ChatRequest)"/> instead.
/// </summary>
/// <param name="request">The request to send to the API. This does not fall back to default values specified in <see cref="DefaultChatRequestArgs"/>.</param>
/// <param name="resultHandler">An action to be called as each new result arrives, which includes the index of the result in the overall result set.</param>
public async Task StreamCompletionAsync(ChatRequest request, Action<int, ChatResult> resultHandler)
{
int index = 0;
await foreach (var res in StreamChatEnumerableAsync(request))
{
resultHandler(index++, res);
}
}
/// <summary>
/// Ask the API to complete the message(s) using the specified request, and stream the results to the <paramref name="resultHandler"/> as they come in.
/// If you are on the latest C# supporting async enumerables, you may prefer the cleaner syntax of <see cref="StreamChatEnumerableAsync(ChatRequest)"/> instead.
/// </summary>
/// <param name="request">The request to send to the API. This does not fall back to default values specified in <see cref="DefaultChatRequestArgs"/>.</param>
/// <param name="resultHandler">An action to be called as each new result arrives.</param>
public async Task StreamChatAsync(ChatRequest request, Action<ChatResult> resultHandler)
{
await foreach (var res in StreamChatEnumerableAsync(request))
{
resultHandler(res);
}
}
/// <summary>
/// Ask the API to complete the message(s) using the specified request, and stream the results as they come in.
/// If you are not using C# 8 supporting async enumerables or if you are using the .NET Framework, you may need to use <see cref="StreamChatAsync(ChatRequest, Action{ChatResult})"/> instead.
/// </summary>
/// <param name="request">The request to send to the API. This does not fall back to default values specified in <see cref="DefaultChatRequestArgs"/>.</param>
/// <returns>An async enumerable with each of the results as they come in. See <see href="https://docs.microsoft.com/en-us/dotnet/csharp/whats-new/csharp-8#asynchronous-streams"/> for more details on how to consume an async enumerable.</returns>
public IAsyncEnumerable<ChatResult> StreamChatEnumerableAsync(ChatRequest request)
{
request = new ChatRequest(request) { Stream = true };
return HttpStreamingRequest<ChatResult>(Url, HttpMethod.Post, request);
}
/// <summary>
/// Ask the API to complete the message(s) using the specified request, and stream the results as they come in.
/// If you are not using C# 8 supporting async enumerables or if you are using the .NET Framework, you may need to use <see cref="StreamChatAsync(ChatRequest, Action{ChatResult})"/> instead.
/// </summary>
/// <param name="messages">The array of messages to send to the API</param>
/// <param name="model">The model to use. See the ChatGPT models available from <see cref="ModelsEndpoint.GetModelsAsync()"/></param>
/// <param name="temperature">What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer. It is generally recommend to use this or <paramref name="top_p"/> but not both.</param>
/// <param name="top_p">An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. It is generally recommend to use this or <paramref name="temperature"/> but not both.</param>
/// <param name="numOutputs">How many different choices to request for each prompt.</param>
/// <param name="max_tokens">How many tokens to complete to. Can return fewer if a stop sequence is hit.</param>
/// <param name="frequencyPenalty">The scale of the penalty for how often a token is used. Should generally be between 0 and 1, although negative numbers are allowed to encourage token reuse.</param>
/// <param name="presencePenalty">The scale of the penalty applied if a token is already present at all. Should generally be between 0 and 1, although negative numbers are allowed to encourage token reuse.</param>
/// <param name="logitBias">Maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.</param>
/// <param name="stopSequences">One or more sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.</param>
/// <returns>An async enumerable with each of the results as they come in. See <see href="https://docs.microsoft.com/en-us/dotnet/csharp/whats-new/csharp-8#asynchronous-streams">the C# docs</see> for more details on how to consume an async enumerable.</returns>
public IAsyncEnumerable<ChatResult> StreamChatEnumerableAsync(IList<ChatMessage> messages,
Model model = null,
double? temperature = null,
double? top_p = null,
int? numOutputs = null,
int? max_tokens = null,
double? frequencyPenalty = null,
double? presencePenalty = null,
IReadOnlyDictionary<string, float> logitBias = null,
params string[] stopSequences)
{
ChatRequest request = new ChatRequest(DefaultChatRequestArgs)
{
Messages = messages,
Model = model ?? DefaultChatRequestArgs.Model,
Temperature = temperature ?? DefaultChatRequestArgs.Temperature,
TopP = top_p ?? DefaultChatRequestArgs.TopP,
NumChoicesPerMessage = numOutputs ?? DefaultChatRequestArgs.NumChoicesPerMessage,
MultipleStopSequences = stopSequences ?? DefaultChatRequestArgs.MultipleStopSequences,
MaxTokens = max_tokens ?? DefaultChatRequestArgs.MaxTokens,
FrequencyPenalty = frequencyPenalty ?? DefaultChatRequestArgs.FrequencyPenalty,
PresencePenalty = presencePenalty ?? DefaultChatRequestArgs.PresencePenalty,
LogitBias = logitBias ?? DefaultChatRequestArgs.LogitBias
};
return StreamChatEnumerableAsync(request);
}
#endregion
}
}