Skip to content

Commit 7f7cdc3

Browse files
committed
DEV: Use structured responses for summaries
1 parent f9d641d commit 7f7cdc3

File tree

12 files changed

+124
-49
lines changed

12 files changed

+124
-49
lines changed

lib/completions/llm.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ def initialize(dialect_klass, gateway_klass, llm_model, gateway: nil)
241241
# @param feature_context { Hash - Optional } - The feature context to use for the completion.
242242
# @param partial_tool_calls { Boolean - Optional } - If true, the completion will return partial tool calls.
243243
# @param output_thinking { Boolean - Optional } - If true, the completion will return the thinking output for thinking models.
244+
# @param extra_model_params { Hash - Optional } - Other params that are not available accross models. e.g. response_format JSON schema.
244245
#
245246
# @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
246247
#

lib/personas/bot.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ def reply(context, llm_args: {}, &update_blk)
6464

6565
user = context.user
6666

67-
llm_kwargs = { user: user }
67+
llm_kwargs = llm_args.dup
68+
llm_kwargs[:user] = user
6869
llm_kwargs[:temperature] = persona.temperature if persona.temperature
6970
llm_kwargs[:top_p] = persona.top_p if persona.top_p
70-
llm_kwargs[:max_tokens] = llm_args[:max_tokens] if llm_args[:max_tokens].present?
7171

7272
needs_newlines = false
7373
tools_ran = 0

lib/personas/short_summarizer.rb

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,13 @@ def system_prompt
1818
- Limit the summary to a maximum of 40 words.
1919
- Do *NOT* repeat the discussion title in the summary.
2020
21-
Return the summary inside <ai></ai> tags.
21+
Format your response as a JSON object with a single key named "summary", which has the summary as the value.
22+
Your output should be in the following format:
23+
<output>
24+
{"summary": "xx"}
25+
</output>
26+
27+
Where "xx" is replaced by the summary.
2228
PROMPT
2329
end
2430
end

lib/personas/summarizer.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,14 @@ def system_prompt
1818
- Example: link to the 6th post by jane: [agreed with]({resource_url}/6)
1919
- Example: link to the 13th post by joe: [joe]({resource_url}/13)
2020
- When formatting usernames either use @USERNAME OR [USERNAME]({resource_url}/POST_NUMBER)
21+
22+
Format your response as a JSON object with a single key named "summary", which has the summary as the value.
23+
Your output should be in the following format:
24+
<output>
25+
{"summary": "xx"}
26+
</output>
27+
28+
Where "xx" is replaced by the summary.
2129
PROMPT
2230
end
2331
end

lib/summarization/fold_content.rb

Lines changed: 53 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,20 +27,14 @@ def initialize(bot, strategy, persist_summaries: true)
2727
def summarize(user, &on_partial_blk)
2828
truncated_content = content_to_summarize.map { |cts| truncate(cts) }
2929

30-
summary = fold(truncated_content, user, &on_partial_blk)
31-
32-
clean_summary = Nokogiri::HTML5.fragment(summary).css("ai")&.first&.text || summary
30+
# Done here to cover non-streaming mode.
31+
json_reply_end = "\"}"
32+
summary = fold(truncated_content, user, &on_partial_blk).chomp(json_reply_end)
3333

3434
if persist_summaries
35-
AiSummary.store!(
36-
strategy,
37-
llm_model,
38-
clean_summary,
39-
truncated_content,
40-
human: user&.human?,
41-
)
35+
AiSummary.store!(strategy, llm_model, summary, truncated_content, human: user&.human?)
4236
else
43-
AiSummary.new(summarized_text: clean_summary)
37+
AiSummary.new(summarized_text: summary)
4438
end
4539
end
4640

@@ -118,17 +112,40 @@ def fold(items, user, &on_partial_blk)
118112
)
119113

120114
summary = +""
115+
# Auxiliary variables to get the summary content from the JSON response.
116+
raw_buffer = +""
117+
json_start_found = false
118+
json_reply_start_regex = /\{\s*"summary"\s*:\s*"/
119+
unescape_regex = %r{\\(["/bfnrt])}
120+
json_reply_end = "\"}"
121+
121122
buffer_blk =
122-
Proc.new do |partial, cancel, placeholder, type|
123+
Proc.new do |partial, cancel, _, type|
123124
if type.blank?
124-
summary << partial
125-
on_partial_blk.call(partial, cancel) if on_partial_blk
125+
if json_start_found
126+
unescaped_partial = partial.gsub(unescape_regex, '\1')
127+
summary << unescaped_partial
128+
129+
on_partial_blk.call(partial, cancel) if on_partial_blk
130+
else
131+
raw_buffer << partial
132+
133+
if raw_buffer.match?(json_reply_start_regex)
134+
buffered_start =
135+
raw_buffer.gsub(json_reply_start_regex, "").gsub(unescape_regex, '\1')
136+
summary << buffered_start
137+
138+
on_partial_blk.call(buffered_start, cancel) if on_partial_blk
139+
140+
json_start_found = true
141+
end
142+
end
126143
end
127144
end
128145

129-
bot.reply(context, &buffer_blk)
146+
bot.reply(context, llm_args: { extra_model_params: response_format }, &buffer_blk)
130147

131-
summary
148+
summary.chomp(json_reply_end)
132149
end
133150

134151
def available_tokens
@@ -155,10 +172,26 @@ def truncate(item)
155172
item
156173
end
157174

158-
def text_only_update(&on_partial_blk)
159-
Proc.new do |partial, cancel, placeholder, type|
160-
on_partial_blk.call(partial, cancel) if type.blank?
161-
end
175+
def response_format
176+
{
177+
response_format: {
178+
type: "json_schema",
179+
json_schema: {
180+
name: "reply",
181+
schema: {
182+
type: "object",
183+
properties: {
184+
summary: {
185+
type: "string",
186+
},
187+
},
188+
required: ["summary"],
189+
additionalProperties: false,
190+
},
191+
strict: true,
192+
},
193+
},
194+
}
162195
end
163196
end
164197
end

spec/jobs/regular/fast_track_topic_gist_spec.rb

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,16 @@
2121
created_at: 10.minutes.ago,
2222
)
2323
end
24+
2425
let(:updated_gist) { "They updated me :(" }
2526

27+
def in_json_format(summary)
28+
"{\"summary\":\"#{summary}\"}"
29+
end
30+
2631
context "when it's up to date" do
2732
it "does nothing" do
28-
DiscourseAi::Completions::Llm.with_prepared_responses([updated_gist]) do
33+
DiscourseAi::Completions::Llm.with_prepared_responses([in_json_format(updated_gist)]) do
2934
subject.execute(topic_id: topic_1.id)
3035
end
3136

@@ -39,7 +44,7 @@
3944
before { Fabricate(:post, topic: topic_1, post_number: 3) }
4045

4146
it "regenerates the gist using the latest data" do
42-
DiscourseAi::Completions::Llm.with_prepared_responses([updated_gist]) do
47+
DiscourseAi::Completions::Llm.with_prepared_responses([in_json_format(updated_gist)]) do
4348
subject.execute(topic_id: topic_1.id)
4449
end
4550

@@ -52,7 +57,7 @@
5257
it "does nothing if the gist was created less than 5 minutes ago" do
5358
ai_gist.update!(created_at: 2.minutes.ago)
5459

55-
DiscourseAi::Completions::Llm.with_prepared_responses([updated_gist]) do
60+
DiscourseAi::Completions::Llm.with_prepared_responses([in_json_format(updated_gist)]) do
5661
subject.execute(topic_id: topic_1.id)
5762
end
5863

spec/jobs/regular/stream_topic_ai_summary_spec.rb

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,21 +50,31 @@ def with_responses(responses)
5050
end
5151
end
5252

53+
def in_json_format(summary)
54+
"{\"summary\":\"#{summary}\"}"
55+
end
56+
5357
it "publishes updates with a partial summary" do
54-
with_responses(["dummy"]) do
58+
summary = "dummy"
59+
60+
with_responses([in_json_format(summary)]) do
5561
messages =
5662
MessageBus.track_publish("/discourse-ai/summaries/topic/#{topic.id}") do
5763
job.execute(topic_id: topic.id, user_id: user.id)
5864
end
5965

6066
partial_summary_update = messages.first.data
6167
expect(partial_summary_update[:done]).to eq(false)
62-
expect(partial_summary_update.dig(:ai_topic_summary, :summarized_text)).to eq("dummy")
68+
expect(partial_summary_update.dig(:ai_topic_summary, :summarized_text).chomp("\"}")).to eq(
69+
summary,
70+
)
6371
end
6472
end
6573

6674
it "publishes a final update to signal we're done and provide metadata" do
67-
with_responses(["dummy"]) do
75+
summary = "dummy"
76+
77+
with_responses([in_json_format(summary)]) do
6878
messages =
6979
MessageBus.track_publish("/discourse-ai/summaries/topic/#{topic.id}") do
7080
job.execute(topic_id: topic.id, user_id: user.id)
@@ -73,6 +83,7 @@ def with_responses(responses)
7383
final_update = messages.last.data
7484
expect(final_update[:done]).to eq(true)
7585

86+
expect(final_update.dig(:ai_topic_summary, :summarized_text)).to eq(summary)
7687
expect(final_update.dig(:ai_topic_summary, :algorithm)).to eq("fake")
7788
expect(final_update.dig(:ai_topic_summary, :outdated)).to eq(false)
7889
expect(final_update.dig(:ai_topic_summary, :can_regenerate)).to eq(true)

spec/jobs/scheduled/summaries_backfill_spec.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@
8484
end
8585
end
8686

87+
def in_json_format(summary)
88+
"{\"summary\":\"#{summary}\"}"
89+
end
90+
8791
describe "#execute" do
8892
it "backfills a batch" do
8993
topic_2 =
@@ -98,7 +102,7 @@
98102
gist_2 = "Updated gist of topic"
99103

100104
DiscourseAi::Completions::Llm.with_prepared_responses(
101-
[gist_1, gist_2, summary_1, summary_2],
105+
[gist_1, gist_2, summary_1, summary_2].map { |s| in_json_format(s) },
102106
) { subject.execute({}) }
103107

104108
expect(AiSummary.complete.find_by(target: topic_2).summarized_text).to eq(summary_1)

spec/lib/modules/summarization/fold_content_spec.rb

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
llm_model.update!(max_prompt_tokens: model_tokens)
2727
end
2828

29-
let(:single_summary) { "this is a summary" }
29+
let(:single_summary) { "{\"summary\":\"#{clean_summary}\"}" }
30+
let(:clean_summary) { "this is a summary" }
3031

3132
fab!(:user)
3233

@@ -36,7 +37,7 @@
3637
summarizer.summarize(user).tap { expect(spy.completions).to eq(1) }
3738
end
3839

39-
expect(result.summarized_text).to eq(single_summary)
40+
expect(result.summarized_text).to eq(clean_summary)
4041
end
4142
end
4243

spec/requests/summarization/summary_controller_spec.rb

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,16 @@
7373
end
7474

7575
it "returns a summary" do
76-
summary_text = "This is a summary"
76+
clean_summary = "This is a summary"
77+
summary_text = "{\"summary\":\"#{clean_summary}\"}"
7778
DiscourseAi::Completions::Llm.with_prepared_responses([summary_text]) do
7879
get "/discourse-ai/summarization/t/#{topic.id}.json"
7980

8081
expect(response.status).to eq(200)
8182
response_summary = response.parsed_body["ai_topic_summary"]
8283
summary = AiSummary.last
8384

84-
expect(summary.summarized_text).to eq(summary_text)
85+
expect(summary.summarized_text).to eq(clean_summary)
8586
expect(response_summary["summarized_text"]).to eq(summary.summarized_text)
8687
expect(response_summary["algorithm"]).to eq("fake")
8788
expect(response_summary["outdated"]).to eq(false)

0 commit comments

Comments
 (0)