Skip to content

Commit 7f7cdc3

Browse files
committed
DEV: Use structured responses for summaries
1 parent f9d641d commit 7f7cdc3

File tree

12 files changed

+124
-49
lines changed

12 files changed

+124
-49
lines changed

lib/completions/llm.rb

+1
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ def initialize(dialect_klass, gateway_klass, llm_model, gateway: nil)
241241
# @param feature_context { Hash - Optional } - The feature context to use for the completion.
242242
# @param partial_tool_calls { Boolean - Optional } - If true, the completion will return partial tool calls.
243243
# @param output_thinking { Boolean - Optional } - If true, the completion will return the thinking output for thinking models.
244+
# @param extra_model_params { Hash - Optional } - Other params that are not available accross models. e.g. response_format JSON schema.
244245
#
245246
# @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
246247
#

lib/personas/bot.rb

+2-2
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ def reply(context, llm_args: {}, &update_blk)
6464

6565
user = context.user
6666

67-
llm_kwargs = { user: user }
67+
llm_kwargs = llm_args.dup
68+
llm_kwargs[:user] = user
6869
llm_kwargs[:temperature] = persona.temperature if persona.temperature
6970
llm_kwargs[:top_p] = persona.top_p if persona.top_p
70-
llm_kwargs[:max_tokens] = llm_args[:max_tokens] if llm_args[:max_tokens].present?
7171

7272
needs_newlines = false
7373
tools_ran = 0

lib/personas/short_summarizer.rb

+7-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,13 @@ def system_prompt
1818
- Limit the summary to a maximum of 40 words.
1919
- Do *NOT* repeat the discussion title in the summary.
2020
21-
Return the summary inside <ai></ai> tags.
21+
Format your response as a JSON object with a single key named "summary", which has the summary as the value.
22+
Your output should be in the following format:
23+
<output>
24+
{"summary": "xx"}
25+
</output>
26+
27+
Where "xx" is replaced by the summary.
2228
PROMPT
2329
end
2430
end

lib/personas/summarizer.rb

+8
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,14 @@ def system_prompt
1818
- Example: link to the 6th post by jane: [agreed with]({resource_url}/6)
1919
- Example: link to the 13th post by joe: [joe]({resource_url}/13)
2020
- When formatting usernames either use @USERNAME OR [USERNAME]({resource_url}/POST_NUMBER)
21+
22+
Format your response as a JSON object with a single key named "summary", which has the summary as the value.
23+
Your output should be in the following format:
24+
<output>
25+
{"summary": "xx"}
26+
</output>
27+
28+
Where "xx" is replaced by the summary.
2129
PROMPT
2230
end
2331
end

lib/summarization/fold_content.rb

+53-20
Original file line numberDiff line numberDiff line change
@@ -27,20 +27,14 @@ def initialize(bot, strategy, persist_summaries: true)
2727
def summarize(user, &on_partial_blk)
2828
truncated_content = content_to_summarize.map { |cts| truncate(cts) }
2929

30-
summary = fold(truncated_content, user, &on_partial_blk)
31-
32-
clean_summary = Nokogiri::HTML5.fragment(summary).css("ai")&.first&.text || summary
30+
# Done here to cover non-streaming mode.
31+
json_reply_end = "\"}"
32+
summary = fold(truncated_content, user, &on_partial_blk).chomp(json_reply_end)
3333

3434
if persist_summaries
35-
AiSummary.store!(
36-
strategy,
37-
llm_model,
38-
clean_summary,
39-
truncated_content,
40-
human: user&.human?,
41-
)
35+
AiSummary.store!(strategy, llm_model, summary, truncated_content, human: user&.human?)
4236
else
43-
AiSummary.new(summarized_text: clean_summary)
37+
AiSummary.new(summarized_text: summary)
4438
end
4539
end
4640

@@ -118,17 +112,40 @@ def fold(items, user, &on_partial_blk)
118112
)
119113

120114
summary = +""
115+
# Auxiliary variables to get the summary content from the JSON response.
116+
raw_buffer = +""
117+
json_start_found = false
118+
json_reply_start_regex = /\{\s*"summary"\s*:\s*"/
119+
unescape_regex = %r{\\(["/bfnrt])}
120+
json_reply_end = "\"}"
121+
121122
buffer_blk =
122-
Proc.new do |partial, cancel, placeholder, type|
123+
Proc.new do |partial, cancel, _, type|
123124
if type.blank?
124-
summary << partial
125-
on_partial_blk.call(partial, cancel) if on_partial_blk
125+
if json_start_found
126+
unescaped_partial = partial.gsub(unescape_regex, '\1')
127+
summary << unescaped_partial
128+
129+
on_partial_blk.call(partial, cancel) if on_partial_blk
130+
else
131+
raw_buffer << partial
132+
133+
if raw_buffer.match?(json_reply_start_regex)
134+
buffered_start =
135+
raw_buffer.gsub(json_reply_start_regex, "").gsub(unescape_regex, '\1')
136+
summary << buffered_start
137+
138+
on_partial_blk.call(buffered_start, cancel) if on_partial_blk
139+
140+
json_start_found = true
141+
end
142+
end
126143
end
127144
end
128145

129-
bot.reply(context, &buffer_blk)
146+
bot.reply(context, llm_args: { extra_model_params: response_format }, &buffer_blk)
130147

131-
summary
148+
summary.chomp(json_reply_end)
132149
end
133150

134151
def available_tokens
@@ -155,10 +172,26 @@ def truncate(item)
155172
item
156173
end
157174

158-
def text_only_update(&on_partial_blk)
159-
Proc.new do |partial, cancel, placeholder, type|
160-
on_partial_blk.call(partial, cancel) if type.blank?
161-
end
175+
def response_format
176+
{
177+
response_format: {
178+
type: "json_schema",
179+
json_schema: {
180+
name: "reply",
181+
schema: {
182+
type: "object",
183+
properties: {
184+
summary: {
185+
type: "string",
186+
},
187+
},
188+
required: ["summary"],
189+
additionalProperties: false,
190+
},
191+
strict: true,
192+
},
193+
},
194+
}
162195
end
163196
end
164197
end

spec/jobs/regular/fast_track_topic_gist_spec.rb

+8-3
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,16 @@
2121
created_at: 10.minutes.ago,
2222
)
2323
end
24+
2425
let(:updated_gist) { "They updated me :(" }
2526

27+
def in_json_format(summary)
28+
"{\"summary\":\"#{summary}\"}"
29+
end
30+
2631
context "when it's up to date" do
2732
it "does nothing" do
28-
DiscourseAi::Completions::Llm.with_prepared_responses([updated_gist]) do
33+
DiscourseAi::Completions::Llm.with_prepared_responses([in_json_format(updated_gist)]) do
2934
subject.execute(topic_id: topic_1.id)
3035
end
3136

@@ -39,7 +44,7 @@
3944
before { Fabricate(:post, topic: topic_1, post_number: 3) }
4045

4146
it "regenerates the gist using the latest data" do
42-
DiscourseAi::Completions::Llm.with_prepared_responses([updated_gist]) do
47+
DiscourseAi::Completions::Llm.with_prepared_responses([in_json_format(updated_gist)]) do
4348
subject.execute(topic_id: topic_1.id)
4449
end
4550

@@ -52,7 +57,7 @@
5257
it "does nothing if the gist was created less than 5 minutes ago" do
5358
ai_gist.update!(created_at: 2.minutes.ago)
5459

55-
DiscourseAi::Completions::Llm.with_prepared_responses([updated_gist]) do
60+
DiscourseAi::Completions::Llm.with_prepared_responses([in_json_format(updated_gist)]) do
5661
subject.execute(topic_id: topic_1.id)
5762
end
5863

spec/jobs/regular/stream_topic_ai_summary_spec.rb

+14-3
Original file line numberDiff line numberDiff line change
@@ -50,21 +50,31 @@ def with_responses(responses)
5050
end
5151
end
5252

53+
def in_json_format(summary)
54+
"{\"summary\":\"#{summary}\"}"
55+
end
56+
5357
it "publishes updates with a partial summary" do
54-
with_responses(["dummy"]) do
58+
summary = "dummy"
59+
60+
with_responses([in_json_format(summary)]) do
5561
messages =
5662
MessageBus.track_publish("/discourse-ai/summaries/topic/#{topic.id}") do
5763
job.execute(topic_id: topic.id, user_id: user.id)
5864
end
5965

6066
partial_summary_update = messages.first.data
6167
expect(partial_summary_update[:done]).to eq(false)
62-
expect(partial_summary_update.dig(:ai_topic_summary, :summarized_text)).to eq("dummy")
68+
expect(partial_summary_update.dig(:ai_topic_summary, :summarized_text).chomp("\"}")).to eq(
69+
summary,
70+
)
6371
end
6472
end
6573

6674
it "publishes a final update to signal we're done and provide metadata" do
67-
with_responses(["dummy"]) do
75+
summary = "dummy"
76+
77+
with_responses([in_json_format(summary)]) do
6878
messages =
6979
MessageBus.track_publish("/discourse-ai/summaries/topic/#{topic.id}") do
7080
job.execute(topic_id: topic.id, user_id: user.id)
@@ -73,6 +83,7 @@ def with_responses(responses)
7383
final_update = messages.last.data
7484
expect(final_update[:done]).to eq(true)
7585

86+
expect(final_update.dig(:ai_topic_summary, :summarized_text)).to eq(summary)
7687
expect(final_update.dig(:ai_topic_summary, :algorithm)).to eq("fake")
7788
expect(final_update.dig(:ai_topic_summary, :outdated)).to eq(false)
7889
expect(final_update.dig(:ai_topic_summary, :can_regenerate)).to eq(true)

spec/jobs/scheduled/summaries_backfill_spec.rb

+5-1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@
8484
end
8585
end
8686

87+
def in_json_format(summary)
88+
"{\"summary\":\"#{summary}\"}"
89+
end
90+
8791
describe "#execute" do
8892
it "backfills a batch" do
8993
topic_2 =
@@ -98,7 +102,7 @@
98102
gist_2 = "Updated gist of topic"
99103

100104
DiscourseAi::Completions::Llm.with_prepared_responses(
101-
[gist_1, gist_2, summary_1, summary_2],
105+
[gist_1, gist_2, summary_1, summary_2].map { |s| in_json_format(s) },
102106
) { subject.execute({}) }
103107

104108
expect(AiSummary.complete.find_by(target: topic_2).summarized_text).to eq(summary_1)

spec/lib/modules/summarization/fold_content_spec.rb

+3-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
llm_model.update!(max_prompt_tokens: model_tokens)
2727
end
2828

29-
let(:single_summary) { "this is a summary" }
29+
let(:single_summary) { "{\"summary\":\"#{clean_summary}\"}" }
30+
let(:clean_summary) { "this is a summary" }
3031

3132
fab!(:user)
3233

@@ -36,7 +37,7 @@
3637
summarizer.summarize(user).tap { expect(spy.completions).to eq(1) }
3738
end
3839

39-
expect(result.summarized_text).to eq(single_summary)
40+
expect(result.summarized_text).to eq(clean_summary)
4041
end
4142
end
4243

spec/requests/summarization/summary_controller_spec.rb

+3-2
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,16 @@
7373
end
7474

7575
it "returns a summary" do
76-
summary_text = "This is a summary"
76+
clean_summary = "This is a summary"
77+
summary_text = "{\"summary\":\"#{clean_summary}\"}"
7778
DiscourseAi::Completions::Llm.with_prepared_responses([summary_text]) do
7879
get "/discourse-ai/summarization/t/#{topic.id}.json"
7980

8081
expect(response.status).to eq(200)
8182
response_summary = response.parsed_body["ai_topic_summary"]
8283
summary = AiSummary.last
8384

84-
expect(summary.summarized_text).to eq(summary_text)
85+
expect(summary.summarized_text).to eq(clean_summary)
8586
expect(response_summary["summarized_text"]).to eq(summary.summarized_text)
8687
expect(response_summary["algorithm"]).to eq("fake")
8788
expect(response_summary["outdated"]).to eq(false)

spec/services/discourse_ai/topic_summarization_spec.rb

+16-13
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313

1414
let(:strategy) { DiscourseAi::Summarization.topic_summary(topic) }
1515

16+
let(:raw_summary) { "{\"summary\":\"#{clean_summary}\"}" }
17+
let(:clean_summary) { "This is the final summary" }
18+
1619
describe "#summarize" do
1720
subject(:summarization) { described_class.new(strategy, user) }
1821

@@ -30,10 +33,10 @@ def assert_summary_is_cached(topic, summary_response)
3033
let(:summary) { "This is the final summary" }
3134

3235
it "caches the summary" do
33-
DiscourseAi::Completions::Llm.with_prepared_responses([summary]) do
36+
DiscourseAi::Completions::Llm.with_prepared_responses([raw_summary]) do
3437
section = summarization.summarize
35-
expect(section.summarized_text).to eq(summary)
36-
assert_summary_is_cached(topic, summary)
38+
expect(section.summarized_text).to eq(clean_summary)
39+
assert_summary_is_cached(topic, clean_summary)
3740
end
3841
end
3942

@@ -54,7 +57,6 @@ def assert_summary_is_cached(topic, summary_response)
5457

5558
describe "invalidating cached summaries" do
5659
let(:cached_text) { "This is a cached summary" }
57-
let(:updated_summary) { "This is the final summary" }
5860

5961
def cached_summary
6062
AiSummary.find_by(target: topic, summary_type: AiSummary.summary_types[:complete])
@@ -65,7 +67,9 @@ def cached_summary
6567
# once it is cached with_prepared_responses will not work as expected
6668
# since it is glued to the old llm instance
6769
# so we create the cached summary totally independantly
68-
DiscourseAi::Completions::Llm.with_prepared_responses([cached_text]) do
70+
DiscourseAi::Completions::Llm.with_prepared_responses(
71+
["{\"summary\": \"#{cached_text}\"}"],
72+
) do
6973
strategy = DiscourseAi::Summarization.topic_summary(topic)
7074
described_class.new(strategy, user).summarize
7175
end
@@ -86,10 +90,10 @@ def cached_summary
8690
before { cached_summary.update!(original_content_sha: "outdated_sha") }
8791

8892
it "returns a new summary" do
89-
DiscourseAi::Completions::Llm.with_prepared_responses([updated_summary]) do
93+
DiscourseAi::Completions::Llm.with_prepared_responses([raw_summary]) do
9094
section = summarization.summarize
9195

92-
expect(section.summarized_text).to eq(updated_summary)
96+
expect(section.summarized_text).to eq(clean_summary)
9397
end
9498
end
9599

@@ -106,10 +110,10 @@ def cached_summary
106110
end
107111

108112
it "returns a new summary if the skip_age_check flag is passed" do
109-
DiscourseAi::Completions::Llm.with_prepared_responses([updated_summary]) do
113+
DiscourseAi::Completions::Llm.with_prepared_responses([raw_summary]) do
110114
section = summarization.summarize(skip_age_check: true)
111115

112-
expect(section.summarized_text).to eq(updated_summary)
116+
expect(section.summarized_text).to eq(clean_summary)
113117
end
114118
end
115119
end
@@ -118,16 +122,15 @@ def cached_summary
118122
end
119123

120124
describe "stream partial updates" do
121-
let(:summary) { "This is the final summary" }
122-
123125
it "receives a blk that is passed to the underlying strategy and called with partial summaries" do
124126
partial_result = +""
125127

126-
DiscourseAi::Completions::Llm.with_prepared_responses([summary]) do
128+
DiscourseAi::Completions::Llm.with_prepared_responses([raw_summary]) do
127129
summarization.summarize { |partial_summary| partial_result << partial_summary }
128130
end
129131

130-
expect(partial_result).to eq(summary)
132+
# In a real world example, this is removed in the returned AiSummary obj.
133+
expect(partial_result.chomp("\"}")).to eq(clean_summary)
131134
end
132135
end
133136
end

spec/system/summarization/topic_summarization_spec.rb

+4-2
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@
1212
"I like to eat pie. It is a very good dessert. Some people are wasteful by throwing pie at others but I do not do that. I always eat the pie.",
1313
)
1414
end
15-
let(:summarization_result) { "This is a summary" }
15+
16+
let(:clean_summary) { "This is a summary" }
17+
1618
let(:topic_page) { PageObjects::Pages::Topic.new }
1719
let(:summary_box) { PageObjects::Components::AiSummaryTrigger.new }
1820

19-
fab!(:ai_summary) { Fabricate(:ai_summary, target: topic, summarized_text: "This is a summary") }
21+
fab!(:ai_summary) { Fabricate(:ai_summary, target: topic, summarized_text: clean_summary) }
2022

2123
before do
2224
group.add(current_user)

0 commit comments

Comments
 (0)