forked from run-llama/LlamaIndexTS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCondenseQuestionChatEngine.ts
142 lines (128 loc) · 4.37 KB
/
CondenseQuestionChatEngine.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import {
BaseChatEngine,
type NonStreamingChatEngineParams,
type StreamingChatEngineParams,
} from "@llamaindex/core/chat-engine";
import { wrapEventCaller } from "@llamaindex/core/decorator";
import type { ChatMessage, LLM } from "@llamaindex/core/llms";
import { BaseMemory, ChatMemoryBuffer } from "@llamaindex/core/memory";
import {
type CondenseQuestionPrompt,
defaultCondenseQuestionPrompt,
type ModuleRecord,
} from "@llamaindex/core/prompts";
import type { BaseQueryEngine } from "@llamaindex/core/query-engine";
import type { EngineResponse } from "@llamaindex/core/schema";
import {
extractText,
messagesToHistory,
streamReducer,
} from "@llamaindex/core/utils";
import type { ServiceContext } from "../../ServiceContext.js";
import { llmFromSettingsOrContext } from "../../Settings.js";
/**
* CondenseQuestionChatEngine is used in conjunction with a Index (for example VectorStoreIndex).
* It does two steps on taking a user's chat message: first, it condenses the chat message
* with the previous chat history into a question with more context.
* Then, it queries the underlying Index using the new question with context and returns
* the response.
* CondenseQuestionChatEngine performs well when the input is primarily questions about the
* underlying data. It performs less well when the chat messages are not questions about the
* data, or are very referential to previous context.
*/
export class CondenseQuestionChatEngine extends BaseChatEngine {
queryEngine: BaseQueryEngine;
memory: BaseMemory;
llm: LLM;
condenseMessagePrompt: CondenseQuestionPrompt;
get chatHistory() {
return this.memory.getMessages();
}
constructor(init: {
queryEngine: BaseQueryEngine;
chatHistory: ChatMessage[];
serviceContext?: ServiceContext;
condenseMessagePrompt?: CondenseQuestionPrompt;
}) {
super();
this.queryEngine = init.queryEngine;
this.memory = new ChatMemoryBuffer({
chatHistory: init?.chatHistory,
});
this.llm = llmFromSettingsOrContext(init?.serviceContext);
this.condenseMessagePrompt =
init?.condenseMessagePrompt ?? defaultCondenseQuestionPrompt;
}
protected _getPromptModules(): ModuleRecord {
return {};
}
protected _getPrompts(): { condenseMessagePrompt: CondenseQuestionPrompt } {
return {
condenseMessagePrompt: this.condenseMessagePrompt,
};
}
protected _updatePrompts(promptsDict: {
condenseMessagePrompt: CondenseQuestionPrompt;
}): void {
if (promptsDict.condenseMessagePrompt) {
this.condenseMessagePrompt = promptsDict.condenseMessagePrompt;
}
}
private async condenseQuestion(chatHistory: BaseMemory, question: string) {
const chatHistoryStr = messagesToHistory(await chatHistory.getMessages());
return this.llm.complete({
prompt: this.condenseMessagePrompt.format({
question: question,
chatHistory: chatHistoryStr,
}),
});
}
chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>;
chat(
params: StreamingChatEngineParams,
): Promise<AsyncIterable<EngineResponse>>;
@wrapEventCaller
async chat(
params: NonStreamingChatEngineParams | StreamingChatEngineParams,
): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
const { message, stream } = params;
const chatHistory = params.chatHistory
? new ChatMemoryBuffer({
chatHistory:
params.chatHistory instanceof BaseMemory
? await params.chatHistory.getMessages()
: params.chatHistory,
})
: this.memory;
const condensedQuestion = (
await this.condenseQuestion(chatHistory, extractText(message))
).text;
chatHistory.put({ content: message, role: "user" });
if (stream) {
const stream = await this.queryEngine.query({
query: condensedQuestion,
stream: true,
});
return streamReducer({
stream,
initialValue: "",
reducer: (accumulator, part) =>
(accumulator += extractText(part.message.content)),
finished: (accumulator) => {
chatHistory.put({ content: accumulator, role: "assistant" });
},
});
}
const response = await this.queryEngine.query({
query: condensedQuestion,
});
chatHistory.put({
content: response.message.content,
role: "assistant",
});
return response;
}
reset() {
this.memory.reset();
}
}