Skip to content

Commit

Permalink
Add experimental feature: multi-stage search copilot command
Browse files Browse the repository at this point in the history
  • Loading branch information
yym68686 committed Feb 1, 2024
1 parent a12b5f6 commit 0db07ff
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 75 deletions.
49 changes: 28 additions & 21 deletions bot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import re
import os
import sys
sys.dont_write_bytecode = True
import config
import logging
import traceback
Expand Down Expand Up @@ -90,9 +92,6 @@ async def command_bot(update, context, language=None, prompt=translator_prompt,
if message:
if "claude" in config.GPT_ENGINE and config.ClaudeAPI:
robot = config.claudeBot
# if not config.API or config.PLUGINS["USE_G4F"]:
# import utils.gpt4free as gpt4free
# robot = gpt4free
if image_url:
robot = config.GPT4visionbot
title = "`🤖️ gpt-4-vision-preview`\n\n"
Expand Down Expand Up @@ -130,10 +129,11 @@ async def reset_chat(update, context):
)

async def getChatGPT(update, context, title, robot, message, chatid, messageid):
result = title
result = ""
text = message
modifytime = 0
lastresult = ''
time_out = 600
lastresult = title

message = await context.bot.send_message(
chat_id=chatid,
Expand All @@ -151,23 +151,29 @@ async def getChatGPT(update, context, title, robot, message, chatid, messageid):
for data in get_answer(text, convo_id=str(chatid), pass_history=pass_history):
if "🌐" not in data:
result = result + data
tmpresult = result
tmpresult = title + result
modifytime = modifytime + 1
if re.sub(r"```", '', result).count("`") % 2 != 0:
tmpresult = result + "`"
tmpresult = title + result + "`"
if result.count("```") % 2 != 0:
tmpresult = result + "\n```"
tmpresult = title + result + "\n```"
if 'claude2' in title:
tmpresult = re.sub(r",", ',', tmpresult)
if "🌐" in data:
tmpresult = data
if "answer:" in result:
tmpresult = re.sub(r"thought:[\S\s]+?answer:\s", '', tmpresult)
tmpresult = re.sub(r"action:[\S\s]+?answer:\s", '', tmpresult)
tmpresult = re.sub(r"answer:\s", '', tmpresult)
tmpresult = re.sub(r"thought:[\S\s]+", '', tmpresult)
tmpresult = re.sub(r"action:[\S\s]+", '', tmpresult)
else:
tmpresult = re.sub(r"thought:[\S\s]+", '', tmpresult)
if (modifytime % 20 == 0 and lastresult != tmpresult) or "🌐" in data:
if 'claude2' in title:
tmpresult = re.sub(r",", ',', tmpresult)
if "🌐" in data:
tmpresult = data
await context.bot.edit_message_text(chat_id=chatid, message_id=messageid, text=escape(tmpresult), parse_mode='MarkdownV2', disable_web_page_preview=True)
await context.bot.edit_message_text(chat_id=chatid, message_id=messageid, text=escape(tmpresult), parse_mode='MarkdownV2', disable_web_page_preview=True, read_timeout=time_out, write_timeout=time_out, pool_timeout=time_out, connect_timeout=time_out)
lastresult = tmpresult
except Exception as e:
print('\033[31m')
print("response_msg", result)
print("error", e)
traceback.print_exc()
print('\033[0m')
if config.API:
Expand All @@ -177,12 +183,12 @@ async def getChatGPT(update, context, title, robot, message, chatid, messageid):
await context.bot.delete_message(chat_id=chatid, message_id=messageid)
messageid = ''
config.API = ''
result += f"`出错啦!{e}`"
print(result)
if lastresult != result and messageid:
tmpresult = f"`{e}`"
print(tmpresult)
if lastresult != tmpresult and messageid:
if 'claude2' in title:
result = re.sub(r",", ',', result)
await context.bot.edit_message_text(chat_id=chatid, message_id=messageid, text=escape(result), parse_mode='MarkdownV2', disable_web_page_preview=True)
tmpresult = re.sub(r",", ',', tmpresult)
await context.bot.edit_message_text(chat_id=chatid, message_id=messageid, text=escape(tmpresult), parse_mode='MarkdownV2', disable_web_page_preview=True, read_timeout=time_out, write_timeout=time_out, pool_timeout=time_out, connect_timeout=time_out)

@decorators.GroupAuthorization
@decorators.Authorization
Expand Down Expand Up @@ -491,10 +497,10 @@ async def post_init(application: Application) -> None:
await application.bot.set_my_commands([
BotCommand('info', 'basic information'),
BotCommand('pic', 'Generate image'),
BotCommand('copilot', 'Advanced search mode'),
BotCommand('search', 'search Google or duckduckgo'),
BotCommand('en2zh', 'translate to Chinese'),
BotCommand('zh2en', 'translate to English'),
# BotCommand('qa', 'Document Q&A with Embedding Database Search'),
BotCommand('start', 'Start the bot'),
BotCommand('reset', 'Reset the bot'),
])
Expand All @@ -520,6 +526,7 @@ async def post_init(application: Application) -> None:
application.add_handler(CommandHandler("reset", reset_chat))
application.add_handler(CommandHandler("en2zh", lambda update, context: command_bot(update, context, "Simplified Chinese", robot=config.translate_bot)))
application.add_handler(CommandHandler("zh2en", lambda update, context: command_bot(update, context, "english", robot=config.translate_bot)))
application.add_handler(CommandHandler("copilot", lambda update, context: command_bot(update, context, None, None, title=f"`🤖️ {config.GPT_ENGINE}`\n\n", robot=config.copilot_bot)))
application.add_handler(CommandHandler("info", info))
application.add_handler(InlineQueryHandler(inlinequery))
# application.add_handler(CommandHandler("qa", qa))
Expand Down
4 changes: 3 additions & 1 deletion config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from dotenv import load_dotenv
load_dotenv()
import utils.prompt as prompt

WEB_HOOK = os.environ.get('WEB_HOOK', None)
BOT_TOKEN = os.environ.get('BOT_TOKEN', None)
Expand All @@ -22,7 +23,7 @@
from datetime import datetime
current_date = datetime.now()
Current_Date = current_date.strftime("%Y-%m-%d")
systemprompt = os.environ.get('SYSTEMPROMPT', f"You are ChatGPT, a large language model trained by OpenAI. Respond conversationally in {LANGUAGE}. Knowledge cutoff: 2023-04. Current date: [ {Current_Date} ]")
systemprompt = os.environ.get('SYSTEMPROMPT', prompt.system_prompt.format(LANGUAGE, Current_Date))

from utils.chatgpt2api import Chatbot as GPT
from utils.chatgpt2api import Imagebot, claudebot
Expand All @@ -37,6 +38,7 @@
except:
print("无法使用 gpt-4-vision-preview 模型")
translate_bot = GPT(api_key=f"{API}", engine=GPT_ENGINE, system_prompt=systemprompt, temperature=temperature)
copilot_bot = GPT(api_key=f"{API}", engine=GPT_ENGINE, system_prompt=prompt.search_system_prompt.format(LANGUAGE), temperature=temperature)
dallbot = Imagebot(api_key=f"{API}")
else:
ChatGPTbot = None
Expand Down
6 changes: 3 additions & 3 deletions test/test_Web_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def Web_crawler(url: str, isSearch=False) -> str:
print("Skipping large file:", url)
return result
soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')

table_contents = ""
tables = soup.find_all('table')
for table in tables:
Expand Down Expand Up @@ -133,7 +133,8 @@ def Web_crawler(url: str, isSearch=False) -> str:
# for url in ['https://s.weibo.com/top/summary?cate=realtimehot']:
# for url in ['https://tophub.today/n/KqndgxeLl9']:
# for url in ['https://support.apple.com/zh-cn/HT213931']:
for url in ['https://www.usnews.com/news/entertainment/articles/2023-12-22/china-drafts-new-rules-proposing-restrictions-on-online-gaming']:
for url in ['https://finance.sina.com.cn/stock/roll/2023-06-26/doc-imyyrexk4053724.shtml', 'https://s.weibo.com/top/summary?cate=realtimehot', 'https://tophub.today/n/KqndgxeLl9', 'https://www.whatsonweibo.com/', 'https://www.trendingonweibo.com/?ref=producthunt', 'https://www.trendingonweibo.com/', 'https://www.statista.com/statistics/1377073/china-most-popular-news-on-weibo/']:
# for url in ['https://www.usnews.com/news/entertainment/articles/2023-12-22/china-drafts-new-rules-proposing-restrictions-on-online-gaming']:
# for url in ['https://developer.aliyun.com/article/721836']:
# for url in ['https://cn.aliyun.com/page-source/price/detail/machinelearning_price']:
# for url in ['https://mp.weixin.qq.com/s/Itad7Y-QBcr991JkF3SrIg']:
Expand All @@ -148,4 +149,3 @@ def Web_crawler(url: str, isSearch=False) -> str:
run_time = end_time - start_time
# 打印运行时间
print(f"程序运行时间:{run_time}秒")

9 changes: 9 additions & 0 deletions test/test_re_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import re
matches = re.finditer(r"answer: (.*)", test_str, re.MULTILINE)
result = []
for matchNum, match in enumerate(matches, start=1):
for groupNum in range(0, len(match.groups())):
groupNum = groupNum + 1
result.append(match.group(groupNum))

print("\n\n".join(result))
63 changes: 26 additions & 37 deletions utils/chatgpt2api.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def __init__(
api_key: str,
engine: str = os.environ.get("GPT_ENGINE") or "gpt-3.5-turbo",
proxy: str = None,
timeout: float = None,
timeout: float = 600,
max_tokens: int = None,
temperature: float = 0.5,
top_p: float = 1.0,
Expand Down Expand Up @@ -285,7 +285,7 @@ def __init__(
)
# context max tokens
self.truncate_limit: int = truncate_limit or (
16000
32000
# 126500 Control the number of search characters to prevent excessive spending
if "gpt-4-1106-preview" in engine or "gpt-4-0125-preview" in engine or "gpt-4-turbo-preview" in engine or self.engine == "gpt-4-vision-preview"
else 30500
Expand Down Expand Up @@ -337,7 +337,7 @@ def __init__(
],
}
self.function_calls_counter = {}
self.function_call_max_loop = 3
self.function_call_max_loop = 10
# self.encode_web_text_list = []

if self.get_token_count("default") > self.max_tokens:
Expand All @@ -362,7 +362,7 @@ def add_to_conversation(
else:
print('\033[31m')
print("error: add_to_conversation message is None or empty")
print(self.conversation[convo_id])
print("role", role, "function_name", function_name, "message", message)
print('\033[0m')

def __truncate_conversation(self, convo_id: str = "default") -> None:
Expand Down Expand Up @@ -593,6 +593,7 @@ def ask_stream(
)
response_role: str or None = None
full_response: str = ""
function_full_response: str = ""
function_call_name: str = ""
need_function_call: bool = False
for line in response.iter_lines():
Expand All @@ -609,6 +610,7 @@ def ask_stream(
if line == "[DONE]":
break
resp: dict = json.loads(line)
# print("resp", resp)
choices = resp.get("choices")
if not choices:
continue
Expand All @@ -627,12 +629,12 @@ def ask_stream(
function_call_content = delta["function_call"]["arguments"]
if "name" in delta["function_call"]:
function_call_name = delta["function_call"]["name"]
full_response += function_call_content
if full_response.count("\\n") > 2 or "}" in full_response:
function_full_response += function_call_content
if function_full_response.count("\\n") > 2 or "}" in function_full_response:
break
if need_function_call:
full_response = check_json(full_response)
print("full_response", full_response)
function_full_response = check_json(function_full_response)
print("function_full_response", function_full_response)
if not self.function_calls_counter.get(function_call_name):
self.function_calls_counter[function_call_name] = 1
else:
Expand All @@ -641,51 +643,31 @@ def ask_stream(
function_call_max_tokens = self.truncate_limit - message_token["total"] - 1000
if function_call_max_tokens <= 0:
function_call_max_tokens = int(self.truncate_limit / 2)
print("function_call_max_tokens", function_call_max_tokens)
print("\033[32m function_call", function_call_name, "max token:", function_call_max_tokens, "\033[0m")
if function_call_name == "get_search_results":
# g4t 提取的 prompt 有问题
# prompt = json.loads(full_response)["prompt"]
for index in range(len(self.conversation[convo_id])):
if self.conversation[convo_id][-1 - index]["role"] == "user":
self.conversation[convo_id][-1 - index]["content"][0]["text"] = self.conversation[convo_id][-1 - index]["content"][0]["text"].replace("search: ", "")
prompt = self.conversation[convo_id][-1 - index]["content"][0]["text"]
if json.loads(full_response)["prompt"].strip() != prompt:
prompt = " ".join([prompt, json.loads(full_response)["prompt"].strip()]).strip()
print("\n\nprompt", prompt)
break
tiktoken.get_encoding("cl100k_base")
encoding = tiktoken.encoding_for_model(config.GPT_ENGINE)
web_result = yield from get_url_text_list(prompt)

encode_web_text_list = encoding.encode(" ".join(web_result))
print("search len", len(encode_web_text_list))
function_response = encoding.decode(encode_web_text_list[:function_call_max_tokens])
# if self.encode_web_text_list == []:
# self.encode_web_text_list = encoding.encode(" ".join(get_url_text_list(prompt)))
# print("search len", len(self.encode_web_text_list))
# function_response = encoding.decode(self.encode_web_text_list[:function_call_max_tokens])
# self.encode_web_text_list = self.encode_web_text_list[function_call_max_tokens:]

# function_response = eval(function_call_name)(prompt, function_call_max_tokens)
prompt = json.loads(function_full_response)["prompt"]
function_response = eval(function_call_name)(prompt)
function_response, text_len = cut_message(function_response, function_call_max_tokens)
function_response = (
f"You need to response the following question: {prompt}. Search results is provided inside <Search_results></Search_results> XML tags. Your task is to think about the question step by step and then answer the above question in {config.LANGUAGE} based on the Search results provided. Please response in {config.LANGUAGE} and adopt a style that is logical, in-depth, and detailed. Note: In order to make the answer appear highly professional, you should be an expert in textual analysis, aiming to make the answer precise and comprehensive. Directly response markdown format, without using markdown code blocks"
"Here is the Search results, inside <Search_results></Search_results> XML tags:"
"<Search_results>"
"{}"
"</Search_results>"
).format(function_response)
user_prompt = f"You need to response the following question: {prompt}. Search results is provided inside <Search_results></Search_results> XML tags. Your task is to think about the question step by step and then answer the above question in {config.LANGUAGE} based on the Search results provided. Please response in {config.LANGUAGE} and adopt a style that is logical, in-depth, and detailed. Note: In order to make the answer appear highly professional, you should be an expert in textual analysis, aiming to make the answer precise and comprehensive. Directly response markdown format, without using markdown code blocks"
self.add_to_conversation(user_prompt, "user", convo_id=convo_id)
# user_prompt = f"You need to response the following question: {prompt}. Search results is provided inside <Search_results></Search_results> XML tags. Your task is to think about the question step by step and then answer the above question in {config.LANGUAGE} based on the Search results provided. Please response in {config.LANGUAGE} and adopt a style that is logical, in-depth, and detailed. Note: In order to make the answer appear highly professional, you should be an expert in textual analysis, aiming to make the answer precise and comprehensive. Directly response markdown format, without using markdown code blocks"
# self.add_to_conversation(user_prompt, "user", convo_id=convo_id)
if function_call_name == "get_url_content":
url = json.loads(full_response)["url"]
url = json.loads(function_full_response)["url"]
print("\n\nurl", url)
function_response = Web_crawler(url)
function_response, text_len = cut_message(function_response, function_call_max_tokens)
function_response = (
"Here is the documentation, inside <document></document> XML tags:"
"<document>"
"{}"
"</document>"
).format(function_response)
function_response, text_len = cut_message(function_response, function_call_max_tokens)
if function_call_name == "get_date_time_weekday":
function_response = eval(function_call_name)()
function_response, text_len = cut_message(function_response, function_call_max_tokens)
Expand All @@ -695,8 +677,15 @@ def ask_stream(
else:
function_response = "抱歉,直接告诉用户,无法找到相关信息"
response_role = "function"
# print(self.conversation[convo_id][-1])
if self.conversation[convo_id][-1]["role"] == "function" and self.conversation[convo_id][-1]["name"] == "get_search_results":
mess = self.conversation[convo_id].pop(-1)
# print("Truncate message:", mess)
self.add_to_conversation(full_response, "assistant", convo_id=convo_id)
yield from self.ask_stream(function_response, response_role, convo_id=convo_id, function_name=function_call_name)
else:
if self.conversation[convo_id][-1]["role"] == "function" and self.conversation[convo_id][-1]["name"] == "get_search_results":
mess = self.conversation[convo_id].pop(-1)
self.add_to_conversation(full_response, response_role, convo_id=convo_id)
self.function_calls_counter = {}
# self.clear_function_call(convo_id=convo_id)
Expand Down
Loading

0 comments on commit 0db07ff

Please sign in to comment.