Skip to content

Commit 262a0a4

Browse files
authored
Merge pull request #33 from merefield/semantic_search
FEATURE: Semantic Search
2 parents 5d8c2c4 + 281ded4 commit 262a0a4

23 files changed

+548
-83
lines changed

.github/workflows/plugin-tests.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,15 @@ jobs:
5959
sudo -E -u postgres script/start_test_db.rb
6060
sudo -u postgres psql -c "CREATE ROLE $PGUSER LOGIN SUPERUSER PASSWORD '$PGPASSWORD';"
6161
62+
- name: Install pg_embeddings
63+
run: |
64+
sudo apt-get update
65+
sudo apt-get -y install -y postgresql-server-dev-13
66+
git clone https://github.com/neondatabase/pg_embedding.git
67+
cd pg_embedding
68+
make PG_CONFIG=/usr/lib/postgresql/13/bin/pg_config
69+
make PG_CONFIG=/usr/lib/postgresql/13/bin/pg_config install
70+
6271
- name: Bundler cache
6372
uses: actions/cache@v3
6473
with:
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# frozen_string_literal: true
2+
3+
# Job is triggered on a Post destruction.
4+
class ::Jobs::ChatbotPostEmbeddingDeleteJob < Jobs::Base
5+
sidekiq_options retry: false
6+
7+
def execute(opts)
8+
begin
9+
post_id = opts[:id]
10+
11+
::DiscourseChatbot.progress_debug_message("101. Deleting a Post Embedding for Post id: #{post_id}")
12+
13+
::DiscourseChatbot::PostEmbedding.find_by(post_id: post_id).destroy!
14+
rescue => e
15+
Rails.logger.error ("OpenAIBot Post Embedding: There was a problem, but will retry til limit: #{e}")
16+
end
17+
end
18+
end
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# frozen_string_literal: true
2+
3+
# Job is triggered on an update to a Post.
4+
class ::Jobs::ChatbotPostEmbeddingJob < Jobs::Base
5+
sidekiq_options retry: 5, dead: false
6+
7+
def execute(opts)
8+
begin
9+
post_id = opts[:id]
10+
11+
::DiscourseChatbot.progress_debug_message("100. Creating/updating a Post Embedding for Post id: #{post_id}")
12+
13+
post_embedding = ::DiscourseChatbot::PostEmbeddingProcess.new
14+
15+
post_embedding.upsert_embedding(post_id)
16+
rescue => e
17+
Rails.logger.error ("OpenAIBot Post Embedding: There was a problem, but will retry til limit: #{e}")
18+
end
19+
end
20+
end

app/models/embedding.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# frozen_string_literal: true
2+
3+
class ::DiscourseChatbot::PostEmbedding < ActiveRecord::Base
4+
self.table_name = 'chatbot_post_embeddings'
5+
6+
validates :post_id, presence: true, uniqueness: true
7+
end

config/locales/server.en.yml

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,95 @@ en:
4747
title: "The subject of this conversation is %{topic_title}"
4848
first_post: "The first thing someone said was %{username} who said %{raw}"
4949
post: "%{username} said %{raw}"
50+
function:
51+
calculator:
52+
description: |
53+
Useful for getting the result of a math expression. It is a general purpose calculator. It works with Ruby expressions.
54+
55+
You can retrieve the current date from it too and using the core Ruby Time method to calculate dates.
56+
57+
The input to this tool should be a valid mathematical expression that could be executed by the base Ruby programming language with no extensions.
58+
59+
Be certain to prefix any functions with 'Math.'
60+
61+
Usage:
62+
Action Input: 1 + 1
63+
Action Input: 3 * 2 / 4
64+
Action Input: 9 - 7
65+
Action Input: Time.now - 2 * 24 * 60 * 60
66+
Action Input: Math.cbrt(13) + Math.cbrt(12)
67+
Action Input: Math.sqrt(8)
68+
Action Input: (4.1 + 2.3) / (2.0 - 5.6) * 3
69+
parameters:
70+
input: the mathematical expression you need to process and get the answer to. Make sure it is Ruby compatible.
71+
error: "'%{parameter}' is an invalid mathematical expression, make sure if you are trying to calculate dates use Ruby Time class"
72+
forum_search:
73+
description: |
74+
Search the local forum for information that may help you answer the question. Especially useful when the forum specialises in the subject matter of the query.
75+
Searching the local forum is preferable to searching google or the internet and should be considered higher priority. It is quicker and cheaper.
76+
77+
Input should be a search query. You can optionally also specify the number of posts you wish returned from your query.
78+
79+
Outputs text from the Post and a url link to it you can provide the user. When presenting the url in your reply, do not embed in an anchor, just write the straight link.
80+
parameters:
81+
query: "search query for looking up information on the forum"
82+
number_of_posts: "specify the number of posts you want returned from your query"
83+
answer_summary: "The top %{number_of_posts} posts on the forum related to this query are, best match first:\n\n"
84+
answer: "Number %{rank}: the post is at this web address: %{url}, it was written by '%{username}' on %{date} and the text is '%{raw}'.\n\n"
85+
error: "'%{query}': my search for this on the forum failed."
86+
google_search:
87+
description: |
88+
A wrapper around Google Search.
89+
90+
Useful for when you need to answer questions about current events.
91+
Always one of the first options when you need to find information on internet.
92+
93+
Input should be a search query.
94+
parameters:
95+
query: "search query for looking up information on the internet"
96+
error: "%{query}: my search for this on the internet failed."
97+
news:
98+
description: |
99+
A wrapper around the News API.
100+
101+
Useful for when you need to answer questions about current events in the news, current events or affairs.
102+
103+
Input should be a search query and a date from which to search news, so if the request is today, the search should be for todays date
104+
parameters:
105+
query: "query string for searching current news and events"
106+
start_date: "start date from which to search for news in format YYYY-MM-DD"
107+
answer: "The latest news about this is: "
108+
error: "ERROR: Had trouble retrieving the news!"
109+
stock_data:
110+
description: |
111+
An API for MarketStack stock data. You need to call it using the stock ticker. You can optionally also provide a specific date.
112+
parameters:
113+
ticker: "ticker for share or stock query"
114+
date: "date for data in format YYYY-MM-DD"
115+
answer: "Ticker %{ticker} had a day close of %{close} on %{date}, with a high of %{high} and a low of %{low}"
116+
error: "ERROR: Had trouble retrieving information from Market Stack for stock market information!"
117+
wikipedia:
118+
description: |
119+
A wrapper around Wikipedia.
120+
121+
Useful for when you need to answer general questions about
122+
people, places, companies, facts, historical events, or other subjects.
123+
124+
Input should be a search query
125+
parameters:
126+
query: "query string for wikipedia search"
127+
answer: "The relevant wikipedia page has the following summary: '%{summary}' and the article can be found at this url link: %{url}"
128+
error: "ERROR: Had trouble retrieving information from Wikipedia!"
129+
agent:
130+
handle_function_call:
131+
answer: "The answer is %{result}."
132+
call_function:
133+
error: "There was something wrong with your function arguments"
134+
final_thought_answer:
135+
opener: "To answer the question I will use these step by step instructions.\n\n"
136+
thought_declaration: "I will use the %{function_name} function to calculate the answer with arguments %{arguments}.\n\n"
137+
final_thought: "%{thoughts} Based on the above, I will now answer the question, this message will only be seen by me so answer with the assumption with that the user has not seen this message."
138+
50139
errors:
51140
general: "Sorry, I'm not well right now. Lets talk some other time. Meanwhile, please ask the admin to check the logs, thank you!"
52141
retries: "I've tried working out a response for you several times, but ultimately failed. Please contact the admin if this persists, thank you!"

config/settings.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,13 @@ plugins:
6666
default: gpt-3.5-turbo
6767
choices:
6868
- gpt-3.5-turbo
69-
- gpt-3.5-turbo-16k
7069
- gpt-3.5-turbo-0613
70+
- gpt-3.5-turbo-16k
71+
- gpt-3.5-turbo-16k-0613
7172
- gpt-4
72-
- gpt-4-32k
7373
- gpt-4-0613
74+
- gpt-4-32k
75+
- gpt-4-32k-0613
7476
chatbot_reply_job_time_delay:
7577
client: false
7678
default: 3
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# frozen_string_literal: true
2+
3+
class EnableEmbeddingExtension < ActiveRecord::Migration[7.0]
4+
def change
5+
begin
6+
enable_extension :embedding
7+
rescue Exception => e
8+
if DB.query_single("SELECT 1 FROM pg_available_extensions WHERE name = 'embedding';").empty?
9+
STDERR.puts "----------------------------DISCOURSE CHATBOT ERROR----------------------------------"
10+
STDERR.puts " Discourse Chatbot now requires the embedding extension on the PostgreSQL database."
11+
STDERR.puts " Run a `./launcher rebuild app` to fix it on a standard install."
12+
STDERR.puts " Alternatively, you can remove Discourse Chatbot to rebuild."
13+
STDERR.puts "----------------------------DISCOURSE CHATBOT ERROR----------------------------------"
14+
end
15+
raise e
16+
end
17+
end
18+
end
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# frozen_string_literal: true
2+
3+
class CreateChatbotEmbeddingsTable < ActiveRecord::Migration[7.0]
4+
def change
5+
create_table :chatbot_embeddings do |t|
6+
t.integer :post_id, null: false, index: { unique: true }, foreign_key: true
7+
t.column :embedding, "real[]", null: false
8+
t.timestamps
9+
end
10+
end
11+
end
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# frozen_string_literal: true
2+
3+
class CreateChatbotEmbeddingsIndex < ActiveRecord::Migration[7.0]
4+
def up
5+
execute <<-SQL
6+
CREATE INDEX hnsw_index_on_chatbot_embeddings ON chatbot_embeddings USING hnsw(embedding)
7+
WITH (dims=1536, m=64, efconstruction=64, efsearch=64);
8+
SQL
9+
end
10+
11+
def down
12+
execute <<-SQL
13+
DROP INDEX hnsw_index_on_chatbot_embeddings;
14+
SQL
15+
end
16+
end
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
# frozen_string_literal: true
3+
4+
class RenameChatbotEmbeddingsTable < ActiveRecord::Migration[7.0]
5+
def change
6+
begin
7+
Migration::SafeMigrate.disable!
8+
rename_table :chatbot_embeddings, :chatbot_post_embeddings
9+
ensure
10+
Migration::SafeMigrate.enable!
11+
end
12+
end
13+
end

0 commit comments

Comments
 (0)