-
Notifications
You must be signed in to change notification settings - Fork 106
/
Copy pathapp.py
executable file
·74 lines (66 loc) · 2.79 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python3
import os
import openai
from chromadb.config import Settings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
# OpenAI Configuration
default_model = 'gpt-4'
if os.getenv("OPENAI_API_TYPE") == "azure" or (os.getenv("OPENAI_API_BASE") is not None and "azure" in os.getenv("OPENAI_API_BASE")):
openai.api_type = "azure"
openai.api_base = os.getenv("OPENAI_API_BASE")
openai.api_version = "2023-05-15"
openai.api_key = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(model_name=default_model,
temperature=0,
model_kwargs={"engine": default_model.replace('.', '')})
embeddings = OpenAIEmbeddings(
deployment="text-embedding-ada-002",
model="text-embedding-ada-002",
openai_api_type=os.getenv("OPENAI_API_TYPE"),
openai_api_base=os.getenv("OPENAI_API_BASE"),
openai_api_version="2023-05-15",
openai_api_key=os.getenv("OPENAI_API_KEY"),
chunk_size=1 # Only 1 is allowed for Azure OpenAI
)
else:
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1")
embeddings = OpenAIEmbeddings(
model="text-embedding-ada-002",
openai_api_key=os.getenv("OPENAI_API_KEY"),
)
llm = ChatOpenAI(model=default_model,
temperature=0)
def load_store():
'''Load data and embedding.'''
documents = DirectoryLoader("./doc/",
glob="**/*.md",
recursive=True,
show_progress=True,
silent_errors=True,
use_multithreading=True,
loader_cls=TextLoader,
).load()
text_splitter = CharacterTextSplitter(chunk_size=4096, separator="\n")
docs = []
for d in documents:
splits = text_splitter.split_text(d.page_content)
docs.extend(splits)
chroma_store = Chroma.from_texts(
docs, embeddings, persist_directory="./store",
client_settings=Settings(anonymized_telemetry=False))
return chroma_store
# Initialize store and chain
print('Embedding documents...')
store = load_store()
chain = RetrievalQA.from_chain_type(
llm=llm, chain_type="refine", retriever=store.as_retriever())
# Run the chain
print('Running chain...')
msg = '作为一名 Linux 内核和 eBPF 的专家,你的任务是开发一个 bpftrace 程序,跟踪系统中进程的系统调用数量。'
print(chain.run(msg))