-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquery.py
94 lines (80 loc) · 2.89 KB
/
query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
import json
import os
from getpass import getpass
import openai
from uuid import uuid4
import sys
from dotenv import load_dotenv
load_dotenv()
ASTRA_DB_SECURE_BUNDLE_PATH = os.environ["ASTRA_DB_SECURE_BUNDLE_PATH"]
ASTRA_DB_APPLICATION_TOKEN = os.environ["ASTRA_DB_APPLICATION_TOKEN"]
ASTRA_DB_KEYSPACE = "vector"
# this
cluster = Cluster(
cloud={
"secure_connect_bundle": ASTRA_DB_SECURE_BUNDLE_PATH,
},
auth_provider=PlainTextAuthProvider(
"token",
ASTRA_DB_APPLICATION_TOKEN,
),
)
session = cluster.connect()
keyspace = ASTRA_DB_KEYSPACE
openai.api_key=os.environ["OPENAI_API_KEY"]
embedding_model_name = "text-embedding-ada-002"
def find_quote_and_author(query_quote, n):
query_vector = openai.Embedding.create(
input=[query_quote],
engine=embedding_model_name,
).data[0].embedding
search_statement = f"""SELECT playerline, player FROM vector.shakespeare_cql
ORDER BY embedding_vector ANN OF %s
LIMIT %s;
"""
# For best performance, one should keep a cache of prepared statements (see the insertion code above)
# for the various possible statements used here.
# (We'll leave it as an exercise to the reader to avoid making this code too long.
# Remember: to prepare a statement you use '?' instead of '%s'.)
query_values = tuple([query_vector] + [n])
result_rows = session.execute(search_statement, query_values)
return [
(result_row.playerline, result_row.player)
for result_row in result_rows
]
completion_model_name = "gpt-3.5-turbo"
generation_prompt_template = """"Generate an answer to a question. Use only the information in the provided documents.
Answer with {wordcount} words. If you don't know, just say you don't know, don't try to make up an answer. Be as truthful as possible.
REFERENCE TOPIC: "{topic}"
ACTUAL EXAMPLES:
{examples}
"""
def generate_quote(topic, n=100, author=None, tags=None):
quotes = find_quote_and_author(query_quote=topic, n=n)
if quotes:
prompt = generation_prompt_template.format(
topic=topic,
wordcount=sys.argv[1],
examples="\n".join(f" - {quote[0]}" for quote in quotes),
)
# a little logging:
print("** quotes found:")
for q, a in quotes:
print(f"** - {q} ({a})")
print("** end of logging")
#
response = openai.ChatCompletion.create(
model=completion_model_name,
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
max_tokens=320,
)
return response.choices[0].message.content.replace('"', '').strip()
else:
print("** no quotes found.")
return None
q_topic = generate_quote(sys.argv[2])
print("\nAn answer to the question:")
print(q_topic)