diff --git a/README.md b/README.md index 99b6f15..bc9148b 100644 --- a/README.md +++ b/README.md @@ -117,12 +117,48 @@ The CLI will prompt you to input instructions interactively: You can configure the demo by specifying the following parameters: - `--aggregator`: The primary model used for final response generation. -- `--reference_models`: List of models used as references. +- `--reference-models`: Models used as references. - `--temperature`: Controls the randomness of the response generation. -- `--max_tokens`: Maximum number of tokens in the response. +- `--max-tokens`: Maximum number of tokens in the response. - `--rounds`: Number of rounds to process the input for refinement. (num rounds == num of MoA layers - 1) -- `--num_proc`: Number of processes to run in parallel for faster execution. -- `--multi_turn`: Boolean to toggle multi-turn interaction capability. +- `--num-proc`: Number of processes to run in parallel for faster execution. +- `--multi-turn`: Boolean to toggle multi-turn interaction capability. + +Specify `--reference-models` multiple times to use multiple models as references. For example: + +```bash +# Specify multiple reference models +python bot.py --reference-models "mistralai/Mixtral-8x22B-Instruct-v0.1" --reference-models "Qwen/Qwen2-72B-Instruct" +``` + +## Other OpenAI Compatible API endpoints + +To use different OpenAI-compatible API endpoints, set the OPENAI_BASE_URL and OPENAI_API_KEY variable. + +``` +export TOGETHER_API_KEY= +export OPENAI_BASE_URL="https://your-api-provider.com/v1" +export OPENAI_API_KEY="your-api-key-here" +``` + +This way, any 3rd party API can be used, such as OpenRouter, Groq, local models, etc. + +### Ollama + +For example, to run the bot using Ollama: + +1. Set up the environment: + +``` +export OPENAI_BASE_URL=http://localhost:11434/v1 +export OPENAI_API_KEY=ollama +``` + +2. Run the bot command: + +``` +python bot.py --model llama3 --reference-models llama3 --reference-models mistral +``` ## Evaluation diff --git a/bot.py b/bot.py index 89f8f19..fe5b0ee 100644 --- a/bot.py +++ b/bot.py @@ -83,7 +83,7 @@ def process_fn( def main( - model: str = "Qwen/Qwen2-72B-Instruct", + aggregator: str = "Qwen/Qwen2-72B-Instruct", reference_models: list[str] = default_reference_models, temperature: float = 0.7, max_tokens: int = 512, @@ -118,7 +118,7 @@ def main( model = Prompt.ask( "\n1. What main model do you want to use?", - default="Qwen/Qwen2-72B-Instruct", + default=aggregator, ) console.print(f"Selected {model}.", style="yellow italic") temperature = float( @@ -199,8 +199,9 @@ def main( for chunk in output: out = chunk.choices[0].delta.content - console.print(out, end="") - all_output += out + if out is not None: + console.print(out, end="") + all_output += out print() if DEBUG: diff --git a/utils.py b/utils.py index 4651745..7b60979 100644 --- a/utils.py +++ b/utils.py @@ -10,6 +10,19 @@ DEBUG = int(os.environ.get("DEBUG", "0")) +TOGETHER_API_KEY = os.environ.get("TOGETHER_API_KEY") +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") +EVAL_API_KEY = os.environ.get("EVAL_API_KEY") + +# If TOGETHER_API_KEY is set, use that one instead and use OPENAI for evaluations +if TOGETHER_API_KEY: + OPENAI_API_KEY = TOGETHER_API_KEY + EVAL_API_KEY = os.environ.get("OPENAI_API_KEY") + +OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://api.together.xyz/v1") +EVAL_BASE_URL = os.environ.get("EVAL_BASE_URL", "https://api.openai.com/v1") + + def generate_together( model, @@ -21,12 +34,12 @@ def generate_together( output = None + endpoint = f"{OPENAI_BASE_URL}/chat/completions" + for sleep_time in [1, 2, 4, 8, 16, 32]: try: - endpoint = "https://api.together.xyz/v1/chat/completions" - if DEBUG: logger.debug( f"Sending messages ({len(messages)}) (last message: `{messages[-1]['content'][:20]}...`) to `{model}`." @@ -41,7 +54,7 @@ def generate_together( "messages": messages, }, headers={ - "Authorization": f"Bearer {os.environ.get('TOGETHER_API_KEY')}", + "Authorization": f"Bearer {OPENAI_API_KEY}", }, ) if "error" in res.json(): @@ -80,11 +93,10 @@ def generate_together_stream( max_tokens=2048, temperature=0.7, ): - endpoint = "https://api.together.xyz/v1" client = openai.OpenAI( - api_key=os.environ.get("TOGETHER_API_KEY"), base_url=endpoint + api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL ) - endpoint = "https://api.together.xyz/v1/chat/completions" + endpoint = f"{OPENAI_BASE_URL}/chat/completions" response = client.chat.completions.create( model=model, messages=messages, @@ -104,7 +116,8 @@ def generate_openai( ): client = openai.OpenAI( - api_key=os.environ.get("OPENAI_API_KEY"), + api_key=EVAL_API_KEY, + base_url=EVAL_BASE_URL, ) for sleep_time in [1, 2, 4, 8, 16, 32]: @@ -179,3 +192,4 @@ def generate_with_references( temperature=temperature, max_tokens=max_tokens, ) +