From 33bddff778cd3af258c90ca32c4ed2d1f671699b Mon Sep 17 00:00:00 2001 From: Daniele Berardi Date: Wed, 12 Mar 2025 05:09:08 +0100 Subject: [PATCH 1/4] Dynamically assert model temperature value in argparser --- llm_toolkit/models.py | 2 +- run_all_experiments.py | 3 +++ run_one_experiment.py | 29 ++++++++++++++++++++++++++++- 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/llm_toolkit/models.py b/llm_toolkit/models.py index 5a81c28dd0..e1fc2ef934 100644 --- a/llm_toolkit/models.py +++ b/llm_toolkit/models.py @@ -46,7 +46,7 @@ # Model hyper-parameters. MAX_TOKENS: int = 2000 NUM_SAMPLES: int = 1 -TEMPERATURE: float = 0.4 +TEMPERATURE: float = 1.0 class LLM: diff --git a/run_all_experiments.py b/run_all_experiments.py index 24bb0aff55..01eb65c863 100755 --- a/run_all_experiments.py +++ b/run_all_experiments.py @@ -279,6 +279,9 @@ def parse_args() -> argparse.Namespace: if args.temperature: assert 2 >= args.temperature >= 0, '--temperature must be within 0 and 2.' + + if args.temperature == TEMPERATURE and args.model in models.LLM.all_llm_names(): + args.temperature = run_one_experiment.get_model_temperature(args) benchmark_yaml = args.benchmark_yaml if benchmark_yaml: diff --git a/run_one_experiment.py b/run_one_experiment.py index 1588b2ee1c..9237d90e0a 100644 --- a/run_one_experiment.py +++ b/run_one_experiment.py @@ -54,7 +54,7 @@ NUM_SAMPLES = 2 MAX_TOKENS: int = 4096 RUN_TIMEOUT: int = 30 -TEMPERATURE: float = 0.4 +TEMPERATURE: float = 1.0 RESULTS_DIR = './results' @@ -311,3 +311,30 @@ def run(benchmark: Benchmark, model: models.LLM, args: argparse.Namespace, return AggregatedResult.from_benchmark_result( _fuzzing_pipelines(benchmark, model, args, work_dirs)) + + +def get_model_temperature(args: argparse.Namespace) -> float: + """Retrieves model temperature default value.""" + default_temperatures = {models.VertexAICodeBisonModel.name: 0.2, + models.VertexAICodeBison32KModel.name: 0.2, + models.GeminiPro.name: 0.9, + models.GeminiUltra.name: 0.2, + models.GeminiExperimental.name: 1.0, + models.GeminiV1D5.name: 1.0, + models.GeminiV2Flash.name: 1.0, + models.GeminiV2.name: 1.0, + models.GeminiV2Think.name: 0.7, + models.ClaudeHaikuV3.name: 0.5, + models.ClaudeOpusV3.name: 0.5, + models.ClaudeSonnetV3D5.name: 0.5, + models.GPT.name: 1.0, + models.GPT4.name: 1.0, + models.GPT4o.name: 1.0, + models.GPT4oMini.name: 1.0, + models.GPT4Turbo.name: 1.0} + if args.model.endswith('-chat') or args.model.endswith('-azure'): + model_name = '-'.join(args.model.split('-')[:-1]) + else: + model_name = args.model + + return default_temperatures[model_name] \ No newline at end of file From f8f198c00385006d24ed473ee5d63616a8847d52 Mon Sep 17 00:00:00 2001 From: Daniele Berardi Date: Fri, 14 Mar 2025 22:11:06 +0100 Subject: [PATCH 2/4] Dynamic assertion of temperature range based on model name --- llm_toolkit/models.py | 20 +++++++++++++++++++- run_all_experiments.py | 8 ++++---- run_one_experiment.py | 29 +---------------------------- 3 files changed, 24 insertions(+), 33 deletions(-) diff --git a/llm_toolkit/models.py b/llm_toolkit/models.py index e1fc2ef934..0ae15c5f69 100644 --- a/llm_toolkit/models.py +++ b/llm_toolkit/models.py @@ -46,7 +46,7 @@ # Model hyper-parameters. MAX_TOKENS: int = 2000 NUM_SAMPLES: int = 1 -TEMPERATURE: float = 1.0 +TEMPERATURE: float = 0.4 class LLM: @@ -61,6 +61,8 @@ class LLM: _max_attempts = 5 # Maximum number of attempts to get prediction response + temperature_range: list[float] = [0.0, 2.0] # Default model temperature range + def __init__( self, ai_binary: str, @@ -125,6 +127,16 @@ def all_llm_names(cls): if hasattr(subcls, 'name') and subcls.name != AIBinaryModel.name: names.append(subcls.name) return names + + @classmethod + def all_llm_temperature_ranges(cls): + """Returns the current model and all child temperature ranges.""" + ranges = {} + for subcls in cls.all_llm_subclasses(): + if (hasattr(subcls, 'temperature_range') and hasattr(subcls, 'name') + and subcls.name != AIBinaryModel.name): + ranges[subcls.name] = subcls.temperature_range + return ranges @abstractmethod def estimate_token_num(self, text) -> int: @@ -384,6 +396,7 @@ class Claude(LLM): _max_output_tokens = 4096 _vertex_ai_model = '' context_window = 200000 + temperature_range = [0.0, 1.0] # ================================ Prompt ================================ # def estimate_token_num(self, text) -> int: @@ -548,6 +561,7 @@ class VertexAIModel(GoogleModel): _vertex_ai_model = '' _max_output_tokens = 2048 + temperature_range = [0.0, 1.0] def cloud_setup(self): """Sets Vertex AI cloud location.""" @@ -603,6 +617,8 @@ def ask_llm(self, prompt: prompts.Prompt) -> str: class GeminiModel(VertexAIModel): """Gemini models.""" + temperature_range = [0.0, 2.0] + safety_config = [ generative_models.SafetySetting( category=generative_models.HarmCategory. @@ -660,6 +676,7 @@ class GeminiPro(GeminiModel): name = 'vertex_ai_gemini-pro' _vertex_ai_model = 'gemini-1.0-pro' + temperature_range = [0.0, 1.0] class GeminiUltra(GeminiModel): @@ -670,6 +687,7 @@ class GeminiUltra(GeminiModel): name = 'vertex_ai_gemini-ultra' _vertex_ai_model = 'gemini-ultra' + temperature_range = [0.0, 1.0] class GeminiExperimental(GeminiModel): diff --git a/run_all_experiments.py b/run_all_experiments.py index 01eb65c863..4eb4ac35f4 100755 --- a/run_all_experiments.py +++ b/run_all_experiments.py @@ -278,10 +278,10 @@ def parse_args() -> argparse.Namespace: assert args.num_samples > 0, '--num-samples must take a positive integer.' if args.temperature: - assert 2 >= args.temperature >= 0, '--temperature must be within 0 and 2.' - - if args.temperature == TEMPERATURE and args.model in models.LLM.all_llm_names(): - args.temperature = run_one_experiment.get_model_temperature(args) + ranges = models.LLM.all_llm_temperature_ranges() + assert ranges[args.model][1] >= args.temperature >= ranges[args.model][0], ( + f'--temperature must be within {ranges[args.model][0]} and ' + f'{ranges[args.model][1]}.') benchmark_yaml = args.benchmark_yaml if benchmark_yaml: diff --git a/run_one_experiment.py b/run_one_experiment.py index 9237d90e0a..1588b2ee1c 100644 --- a/run_one_experiment.py +++ b/run_one_experiment.py @@ -54,7 +54,7 @@ NUM_SAMPLES = 2 MAX_TOKENS: int = 4096 RUN_TIMEOUT: int = 30 -TEMPERATURE: float = 1.0 +TEMPERATURE: float = 0.4 RESULTS_DIR = './results' @@ -311,30 +311,3 @@ def run(benchmark: Benchmark, model: models.LLM, args: argparse.Namespace, return AggregatedResult.from_benchmark_result( _fuzzing_pipelines(benchmark, model, args, work_dirs)) - - -def get_model_temperature(args: argparse.Namespace) -> float: - """Retrieves model temperature default value.""" - default_temperatures = {models.VertexAICodeBisonModel.name: 0.2, - models.VertexAICodeBison32KModel.name: 0.2, - models.GeminiPro.name: 0.9, - models.GeminiUltra.name: 0.2, - models.GeminiExperimental.name: 1.0, - models.GeminiV1D5.name: 1.0, - models.GeminiV2Flash.name: 1.0, - models.GeminiV2.name: 1.0, - models.GeminiV2Think.name: 0.7, - models.ClaudeHaikuV3.name: 0.5, - models.ClaudeOpusV3.name: 0.5, - models.ClaudeSonnetV3D5.name: 0.5, - models.GPT.name: 1.0, - models.GPT4.name: 1.0, - models.GPT4o.name: 1.0, - models.GPT4oMini.name: 1.0, - models.GPT4Turbo.name: 1.0} - if args.model.endswith('-chat') or args.model.endswith('-azure'): - model_name = '-'.join(args.model.split('-')[:-1]) - else: - model_name = args.model - - return default_temperatures[model_name] \ No newline at end of file From c546ea658e80363b418a0c20941b84d64d93d6e0 Mon Sep 17 00:00:00 2001 From: Daniele Berardi Date: Sat, 12 Apr 2025 19:22:51 +0200 Subject: [PATCH 3/4] Introduced a search method for LLMs' attributes. --- llm_toolkit/models.py | 35 +++++++++++++++++++++-------------- run_all_experiments.py | 2 +- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/llm_toolkit/models.py b/llm_toolkit/models.py index 0ae15c5f69..e5333f0c13 100644 --- a/llm_toolkit/models.py +++ b/llm_toolkit/models.py @@ -47,6 +47,7 @@ MAX_TOKENS: int = 2000 NUM_SAMPLES: int = 1 TEMPERATURE: float = 0.4 +TEMPERATURE_RANGE: list[float] = [0.0, 2.0] class LLM: @@ -61,7 +62,7 @@ class LLM: _max_attempts = 5 # Maximum number of attempts to get prediction response - temperature_range: list[float] = [0.0, 2.0] # Default model temperature range + temperature_range: list[float] = TEMPERATURE_RANGE def __init__( self, @@ -118,25 +119,31 @@ def all_llm_subclasses(cls): yield cls for subcls in cls.__subclasses__(): yield from subcls.all_llm_subclasses() + + @classmethod + def all_llm_search(cls, attribute: str) -> list: + """Returns the desired attribute for all models.""" + out = [] + for subcls in cls.all_llm_subclasses(): + if (hasattr(subcls, attribute) and hasattr(subcls, 'name') and + subcls.name != AIBinaryModel.name): + out.append(getattr(subcls, attribute)) + return out @classmethod - def all_llm_names(cls): + def all_llm_names(cls) -> list[str]: """Returns the current model name and all child model names.""" - names = [] - for subcls in cls.all_llm_subclasses(): - if hasattr(subcls, 'name') and subcls.name != AIBinaryModel.name: - names.append(subcls.name) - return names + return cls.all_llm_search('name') @classmethod - def all_llm_temperature_ranges(cls): + def all_llm_temperature_ranges(cls) -> dict[str, list[float, float]]: """Returns the current model and all child temperature ranges.""" - ranges = {} - for subcls in cls.all_llm_subclasses(): - if (hasattr(subcls, 'temperature_range') and hasattr(subcls, 'name') - and subcls.name != AIBinaryModel.name): - ranges[subcls.name] = subcls.temperature_range - return ranges + out = {} + names = cls.all_llm_search('name') + tr = cls.all_llm_search('temperature_range') + for i in range(len(names)): + out[names[i]] = tr[i] + return out @abstractmethod def estimate_token_num(self, text) -> int: diff --git a/run_all_experiments.py b/run_all_experiments.py index 4eb4ac35f4..acd70ee182 100755 --- a/run_all_experiments.py +++ b/run_all_experiments.py @@ -281,7 +281,7 @@ def parse_args() -> argparse.Namespace: ranges = models.LLM.all_llm_temperature_ranges() assert ranges[args.model][1] >= args.temperature >= ranges[args.model][0], ( f'--temperature must be within {ranges[args.model][0]} and ' - f'{ranges[args.model][1]}.') + f'{ranges[args.model][1]} for model {args.model}.') benchmark_yaml = args.benchmark_yaml if benchmark_yaml: From 4339b7d519a28354aa8e717af3c54b3888ee0a41 Mon Sep 17 00:00:00 2001 From: Daniele Berardi Date: Wed, 16 Apr 2025 08:22:23 +0200 Subject: [PATCH 4/4] Modified extraction logic to assign the proper temperature_range to each model --- llm_toolkit/models.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/llm_toolkit/models.py b/llm_toolkit/models.py index e5333f0c13..9d9a57f67b 100644 --- a/llm_toolkit/models.py +++ b/llm_toolkit/models.py @@ -121,13 +121,15 @@ def all_llm_subclasses(cls): yield from subcls.all_llm_subclasses() @classmethod - def all_llm_search(cls, attribute: str) -> list: + def all_llm_search(cls, attribute: str = None) -> list: """Returns the desired attribute for all models.""" out = [] for subcls in cls.all_llm_subclasses(): - if (hasattr(subcls, attribute) and hasattr(subcls, 'name') and - subcls.name != AIBinaryModel.name): - out.append(getattr(subcls, attribute)) + if hasattr(subcls, 'name') and subcls.name != AIBinaryModel.name: + if attribute is not None and hasattr(subcls, attribute): + out.append(getattr(subcls, attribute)) + else: + out.append(subcls) return out @classmethod @@ -138,13 +140,12 @@ def all_llm_names(cls) -> list[str]: @classmethod def all_llm_temperature_ranges(cls) -> dict[str, list[float, float]]: """Returns the current model and all child temperature ranges.""" - out = {} - names = cls.all_llm_search('name') - tr = cls.all_llm_search('temperature_range') - for i in range(len(names)): - out[names[i]] = tr[i] - return out - + return { + m.name: m.temperature_range + for m in cls.all_llm_search() + if hasattr(m, 'temperature_range') + } + @abstractmethod def estimate_token_num(self, text) -> int: """Estimates the number of tokens in |text|."""