-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathlotr.py
132 lines (101 loc) · 4.35 KB
/
lotr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# Import modules
import os
from dotenv import load_dotenv
import chromadb
from langchain.retrievers import (
ContextualCompressionRetriever,
MergerRetriever,
)
from langchain.retrievers.document_compressors.base import DocumentCompressorPipeline
from langchain_chroma import Chroma
from langchain_community.document_transformers import (
EmbeddingsClusteringFilter,
EmbeddingsRedundantFilter,
)
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_transformers import LongContextReorder
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
# The warnings about forking processes and parallelism in the tokenizers library can be resolved
# by setting an environment variable to control the parallelism
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Load environment variables
load_dotenv()
# Get the OpenAI API key
openai_api_key = os.environ["OPENAI_API_KEY"]
# Load a PDF file
loader = PyPDFLoader("chatgpt_book.pdf")
docs = loader.load_and_split()
# Get 3 different embeddings from HuggingFace and OpenAI
huggingface_embeddings1 = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
huggingface_embeddings2 = HuggingFaceEmbeddings(model_name="multi-qa-MiniLM-L6-dot-v1")
openai_embeddings = OpenAIEmbeddings()
# Initialize Chroma with the documents and the first HuggingFace embedding
vector_db1 = Chroma.from_documents(
documents=docs,
embedding=huggingface_embeddings1,
persist_directory='./db',
)
# Initialize Chroma with the documents and the second HuggingFace embedding
vector_db2 = Chroma.from_documents(
documents=docs,
embedding=huggingface_embeddings2,
persist_directory='./db',
)
# Define 2 different retrievers with 2 unique embeddings and search types
retriever_huggingface1 = vector_db1.as_retriever(
search_type="similarity",
search_kwargs={"k": 5},
)
retriever_huggingface2 = vector_db2.as_retriever(
search_type="mmr",
search_kwargs={"k": 5},
)
# The Lord of the Retrievers (LOTR) will hold the output of both retrievers and can be used as any other
# retriever on different types of chains
lotr = MergerRetriever(retrievers=[retriever_huggingface1, retriever_huggingface2])
## Option 1: Remove redundant results from the merged retrievers ##
# # We can remove the redundant results from both retrievers using yet another embedding
# Note: Using multiples embeddings in different steps could help reduce biases
# filter = EmbeddingsRedundantFilter(embeddings=openai_embeddings)
# # Note: No matter the architecture of your model, there is a substantial performance degradation
# # when you include 10+ retrieved documents.
# # In brief: When models must access relevant information in the middle of long contexts,
# # then tend to ignore the provided documents.
# # You can use an additional document transformer to reorder documents after removing redundancy.
# reordering = LongContextReorder()
# pipeline = DocumentCompressorPipeline(transformers=[filter, reordering])
# compression_retriever = ContextualCompressionRetriever(
# base_compressor=pipeline, base_retriever=lotr
# )
## Option 2: Pick a representative sample of documents from the merged retrievers ##
# This filter will divide the documents vectors into clusters or "centers" of meaning
# Then it will pick the closest document to that center for the final results
# By default the result document will be ordered/grouped by clusters
filter_ordered_by_retriever = EmbeddingsClusteringFilter(
embeddings=openai_embeddings,
num_clusters=10,
num_closest=1,
# If you want the final document to be ordered by the original retriever scores
# you need to add the "sorted" parameter
sorted=True,
)
reordering = LongContextReorder()
pipeline = DocumentCompressorPipeline(transformers=[filter_ordered_by_retriever, reordering])
compression_retriever = ContextualCompressionRetriever(
base_compressor=pipeline, base_retriever=lotr
)
# Initialize the LLM for augmented QA
llm = ChatOpenAI(model_name="gpt-4", temperature=0)
# Instantiate the QA chain
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever = compression_retriever,
)
# Run the QA
query ="What are the ten principles for prompt engineering?"
response = qa_chain.invoke(query)
print(response)