diff --git a/.github/requirements.txt b/.github/requirements.txt new file mode 100644 index 00000000..a3bd7204 --- /dev/null +++ b/.github/requirements.txt @@ -0,0 +1,6 @@ +langchain +langchain_openai +langchain_core +langchain_community +faiss-cpu + diff --git a/.github/scripts/document_QA_script.py b/.github/scripts/document_QA_script.py new file mode 100644 index 00000000..175528cd --- /dev/null +++ b/.github/scripts/document_QA_script.py @@ -0,0 +1,74 @@ +''' + +This script is intended to used as Github Action +02/25/2024, created by Benhao Huang + +''' + +import argparse +import os +from langchain.chains import create_retrieval_chain +from langchain.chains.combine_documents import create_stuff_documents_chain +from langchain_openai import ChatOpenAI, OpenAIEmbeddings +from langchain_core.prompts import ChatPromptTemplate +from langchain_community.vectorstores import FAISS +import base64 + + +# Set the OpenAI API key +# os.environ["OPENAI_API_KEY"] = "your openai key" + +# Read the OpenAI API key from an environment variable +openai_api_key = os.getenv("OPENAI_API_KEY") + +# Ensure the OpenAI API key is set in the environment for library use +os.environ["OPENAI_API_KEY"] = openai_api_key + +root = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + + +vector_path = os.path.join(root, ".github") + +# Initialize the embeddings +embeddings = OpenAIEmbeddings() + +# Setup argument parser +argparser = argparse.ArgumentParser() +argparser.add_argument("--query", help="your question") + +query = os.getenv("QUESTION", "say: Please provide something.") + +# Initialize GPT-4 +gpt4 = ChatOpenAI(model="gpt-4-0125-preview") + +# Load the vector store +index_file_path = os.path.join(vector_path, "vector_store") +vector = FAISS.load_local(index_file_path, embeddings, index_name="my_vector_index") + +# Create the prompt template +prompt = ChatPromptTemplate.from_template("""You should answer following problem strictly follow the documents provided. Answer the following question based only on the provided context in a format which is suitable for Github comment: + +<context> +{context} +</context> + +Question: {input}""") + +# Create the document chain +document_chain = create_stuff_documents_chain(gpt4, prompt) + +# Create the retriever +retriever = vector.as_retriever() + +# Create the retrieval chain +retrieval_chain = create_retrieval_chain(retriever, document_chain) + +query_str = "{}".format(query) + +# Invoke the retrieval chain with a sample input +response = retrieval_chain.invoke({"input": query_str})["answer"] + +encoded_answer = base64.b64encode(response.encode("utf-8")).decode("utf-8") + +print(encoded_answer) + diff --git a/.github/vector_store/my_vector_index.faiss b/.github/vector_store/my_vector_index.faiss new file mode 100644 index 00000000..b6e532d6 Binary files /dev/null and b/.github/vector_store/my_vector_index.faiss differ diff --git a/.github/vector_store/my_vector_index.pkl b/.github/vector_store/my_vector_index.pkl new file mode 100644 index 00000000..eb1530f8 Binary files /dev/null and b/.github/vector_store/my_vector_index.pkl differ diff --git a/.github/workflows/process_question.yml b/.github/workflows/process_question.yml new file mode 100644 index 00000000..41b6acbe --- /dev/null +++ b/.github/workflows/process_question.yml @@ -0,0 +1,76 @@ +name: Process Question in Issue Comment +on: + issue_comment: + types: [created] + issues: + types: [opened] + +jobs: + process-question: + if: > + github.event_name == 'issue_comment' && contains(github.event.comment.body, '@bot') || + github.event_name == 'issues' && contains(github.event.issue.body, '@bot') + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: "3.9" + + - name: Cache Python dependencies + uses: actions/cache@v2 + with: + path: | + ~/.cache/pip + !~/.cache/pip/log + key: ${{ runner.os }}-pip-${{ hashFiles('**/.github/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + run: | + pip install -r ./.github/requirements.txt + + - name: Extract Question + id: get-question + run: | + if [ "${{ github.event_name }}" == "issue_comment" ]; then + QUESTION=$(echo "${{ github.event.comment.body }}" | grep -oP '(?<=@bot ).*') + else + QUESTION=$(echo "${{ github.event.issue.body }}" | grep -oP '(?<=@bot ).*') + fi + echo "question=$QUESTION" >> $GITHUB_OUTPUT + + - name: Run Python script + id: run-script + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + QUESTION: ${{ steps.get-question.outputs.question }} + run: | + ANSWER=$(python .github/scripts/document_QA_script.py) + echo "encoded_answer=$ANSWER" >> $GITHUB_ENV + + - name: Decode and create comment + id: decode-answer + env: + ENCODED_ANSWER: ${{ env.encoded_answer }} + run: | + echo "Decoding..." + EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64) + DECODED_ANSWER=$(echo $ENCODED_ANSWER | base64 --decode) + echo "answer<<$EOF" >> $GITHUB_OUTPUT + echo "$DECODED_ANSWER" >> $GITHUB_OUTPUT + echo "$EOF" >> $GITHUB_OUTPUT + + - name: Create Comment with Answer + uses: peter-evans/create-or-update-comment@v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + Here is the answer to your question: + ${{ steps.decode-answer.outputs.answer }} + reactions: "+1" + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.lh/.github/workflows/process_question.yml.json b/.lh/.github/workflows/process_question.yml.json new file mode 100644 index 00000000..75462c11 --- /dev/null +++ b/.lh/.github/workflows/process_question.yml.json @@ -0,0 +1,18 @@ +{ + "sourceFile": ".github/workflows/process_question.yml", + "activeCommit": 0, + "commits": [ + { + "activePatchIndex": 0, + "patches": [ + { + "date": 1708923888992, + "content": "Index: \n===================================================================\n--- \n+++ \n" + } + ], + "date": 1708923888992, + "name": "Commit-0", + "content": "name: Process Question in Issue Comment\non:\n issue_comment:\n types: [created]\n issues:\n types: [opened]\n\njobs:\n process-question:\n if: >\n github.event_name == 'issue_comment' && contains(github.event.comment.body, '@bot') ||\n github.event_name == 'issues' && contains(github.event.issue.body, '@bot')\n runs-on: ubuntu-latest\n steps:\n - name: Checkout code\n uses: actions/checkout@v2\n\n - name: Set up Python 3.9\n uses: actions/setup-python@v2\n with:\n python-version: \"3.9\"\n\n - name: Cache Python dependencies\n uses: actions/cache@v2\n with:\n path: |\n ~/.cache/pip\n !~/.cache/pip/log\n key: ${{ runner.os }}-pip-${{ hashFiles('**/.github/requirements.txt') }}\n restore-keys: |\n ${{ runner.os }}-pip-\n\n - name: Install dependencies\n run: |\n pip install -r ./.github/requirements.txt\n\n - name: Extract Question\n id: get-question\n run: |\n if [ \"${{ github.event_name }}\" == \"issue_comment\" ]; then\n QUESTION=$(echo \"${{ github.event.comment.body }}\" | grep -oP '(?<=@bot ).*')\n else\n QUESTION=$(echo \"${{ github.event.issue.body }}\" | grep -oP '(?<=@bot ).*')\n fi\n echo \"question=$QUESTION\" >> $GITHUB_OUTPUT\n\n - name: Run Python script\n id: run-script\n env:\n OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}\n QUESTION: ${{ steps.get-question.outputs.question }}\n run: |\n ANSWER=$(python .github/scripts/document_QA_script.py)\n echo \"encoded_answer=$ANSWER\" >> $GITHUB_ENV\n\n - name: Decode and create comment\n id: decode-answer\n env:\n ENCODED_ANSWER: ${{ env.encoded_answer }}\n run: |\n echo \"Decoding...\"\n EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)\n DECODED_ANSWER=$(echo $ENCODED_ANSWER | base64 --decode)\n echo \"answer<<$EOF\" >> $GITHUB_OUTPUT\n echo \"$DECODED_ANSWER\" >> $GITHUB_OUTPUT\n echo \"$EOF\" >> $GITHUB_OUTPUT\n\n - name: Create Comment with Answer\n uses: peter-evans/create-or-update-comment@v4\n with:\n issue-number: ${{ github.event.issue.number }}\n body: |\n Here is the answer to your question:\n ${{ steps.decode-answer.outputs.answer }}\n reactions: \"+1\"\n token: ${{ secrets.GITHUB_TOKEN }}\n" + } + ] +} \ No newline at end of file diff --git a/.lh/.lhignore b/.lh/.lhignore new file mode 100644 index 00000000..1de51008 --- /dev/null +++ b/.lh/.lhignore @@ -0,0 +1,6 @@ +# list file to not track by the local-history extension. comment line starts with a '#' character +# each line describe a regular expression pattern (search for 'Javascript regex') +# it will relate to the workspace directory root. for example: +# '.*\.txt' ignores any file with 'txt' extension +# '/test/.*' ignores all the files under the 'test' directory +# '.*/test/.*' ignores all the files under any 'test' directory (even under sub-folders) diff --git a/.lh/requirements.txt.json b/.lh/requirements.txt.json new file mode 100644 index 00000000..c10681ca --- /dev/null +++ b/.lh/requirements.txt.json @@ -0,0 +1,18 @@ +{ + "sourceFile": "requirements.txt", + "activeCommit": 0, + "commits": [ + { + "activePatchIndex": 0, + "patches": [ + { + "date": 1708923787478, + "content": "Index: \n===================================================================\n--- \n+++ \n" + } + ], + "date": 1708923787478, + "name": "Commit-0", + "content": "numpy>=1.19\nscipy>=1.5\ntorch>=1.10\ndgl>=0.6" + } + ] +} \ No newline at end of file