diff --git a/benchmarks/templates/benchmarks/benchmark_table.html b/benchmarks/templates/benchmarks/benchmark_table.html new file mode 100644 index 000000000..4dbe9ff0f --- /dev/null +++ b/benchmarks/templates/benchmarks/benchmark_table.html @@ -0,0 +1,93 @@ +{% load static %} + +{% if models %} + {% if has_user %} +
+ {% endif %} +
+ + + + + + {% for benchmark in benchmarks %} + + {% endfor %} + + + + + {% for model in models %} + + + {% for score_row in model.scores %} + + {% endfor %} + + {% endfor %} + +

+ + Model + +

+

+ {% if benchmark.benchmark_type.parent %} + {# indent, depending on depth #} + + {{ benchmark_parents|get_initial_characters:benchmark.identifier }} + + {% endif %} + + {# reference link #} + {% if benchmark.benchmark_type.reference and benchmark.benchmark_type.reference.url %} + + {% endif %} + + {# identifier #} + {{ benchmark.short_name|simplify_domain }} + + {% if benchmark.benchmark_type.reference and benchmark.benchmark_type.reference.url %} + + {% endif %} +

+ + + {{ score_row.score_ceiled }} +
+
+ +
+ Model scores on brain benchmarks. + Click on a model to see more details. + The more green and bright a cell, the better the model's score. + Scores are ceiled, hover the benchmark to see ceilings. +
+{% if has_user %} +
+{% endif %} +{% else %} +

No data.

+{% endif %} diff --git a/benchmarks/templates/benchmarks/competition2024.html b/benchmarks/templates/benchmarks/competition2024.html index 39f45a7c5..9f754e53f 100644 --- a/benchmarks/templates/benchmarks/competition2024.html +++ b/benchmarks/templates/benchmarks/competition2024.html @@ -130,6 +130,22 @@

Common critiques of the Brain-Score platform + {# leaderboard #} +
+

Competition Leaderboard

+ +

Behavioral Track

+
+ {% include "benchmarks/benchmark_table.html" with benchmarks=benchmarks_behavior_vision models=models_behavior_vision %} +
+ +

Neural Track

+
+ {% include "benchmarks/benchmark_table.html" with benchmarks=benchmarks_neural_vision models=models_neural_vision %} +
+
+ + {# overview #}

Overview

@@ -254,7 +270,8 @@

Models

effnetb1_cutmixpatch_SAM_
  • effnetb1_cutmixpatch_augmix_robust32_avge4e7_manylayers_324x288 - (Winner of the 2022 competition)
  • + (Winner of the 2022 competition) +
  • resnext101_32x32d_wsl
  • @@ -287,9 +304,11 @@

    Models

  • resnet18-local_aggregation
  • - grcnn_robust_v1 (Top-3 competition 2022)
  • + grcnn_robust_v1 (Top-3 competition 2022) +
  • - custom_model_cv_18_dagger_408 (Top-3 competition 2022)
  • + custom_model_cv_18_dagger_408 (Top-3 competition 2022) +
  • ViT_L_32_imagenet1k
  • diff --git a/benchmarks/views/competition2024.py b/benchmarks/views/competition2024.py index 0f29fec53..afff56589 100644 --- a/benchmarks/views/competition2024.py +++ b/benchmarks/views/competition2024.py @@ -1,6 +1,104 @@ from django.shortcuts import render +from .index import get_context +from ..models import User + def view(request): - context = {} + # model filter + included_models = [ + "cvt_cvt-w24-384-in22k_finetuned-in1k_4", + "resnext101_32x8d_wsl", + "effnetb1_cutmixpatch_SAM_robust32_avge6e8e9e10_manylayers_324x288", + "effnetb1_cutmixpatch_augmix_robust32_avge4e7_manylayers_324x288", + "resnext101_32x32d_wsl", + "effnetb1_272x240", + "resnext101_32x48d_wsl", + "pnasnet_large_pytorch", + "resnet-152_v2_pytorch", + "focalnet_tiny_lrf_in1k", + "hmax", + "alexnet", + "CORnet-S", + "resnet-50-robust", + "voneresnet-50-non_stochastic", + # "resnet18-local_aggregation", # TF no longer supported + # "grcnn_robust_v1", # weights deleted on user server + "custom_model_cv_18_dagger_408", + "ViT_L_32_imagenet1k", + "mobilenet_v2_1-4_224_pytorch", + "pixels", + ] + assert len(included_models) == 19 + model_filter = dict(model__name__in=included_models) + + # benchmark filter + track_benchmarks = { + "behavior_vision": [ + "average_vision", + "behavior_vision", + + "Hebart2023-match", + + "Baker2022", + "Baker2022inverted-accuracy_delta", "Baker2022fragmented-accuracy_delta", + "Baker2022frankenstein-accuracy_delta", + + "tong.Coggan2024_behavior-ConditionWiseAccuracySimilarity", + + "BMD2024", + "BMD2024.texture_1Behavioral-accuracy_distance", + "BMD2024.texture_2Behavioral-accuracy_distance", + "BMD2024.dotted_1Behavioral-accuracy_distance", + "BMD2024.dotted_2Behavioral-accuracy_distance", + + "Maniquet2024", + "Maniquet2024-confusion_similarity'", "Maniquet2024-tasks_consistency", + + # "Malania2007", + # "Malania2007.short2", "Malania2007.short4", "Malania2007.short6", "Malania2007.short8", + # "Malania2007.short16", "Malania2007.equal2", "Malania2007.long2", "Malania2007.equal16", + # "Malania2007.long16", "Malania2007.vernieracuity-threshold", + # + # "Scialom2024", + # "Scialom2024_rgb-behavioral_accuracy", + # "Scialom2024_phosphenes-all-behavioral_accuracy", + # "Scialom2024_segments-all-behavioral_accuracy", + # "Scialom2024_phosphenes-100-behavioral_accuracy", + # "Scialom2024_segments-100-behavioral_accuracy", + # + # "Ferguson2024", + # "Ferguson2024circle_line-value_delta", "Ferguson2024color-value_delta", + # "Ferguson2024convergence-value_delta", "Ferguson2024eighth-value_delta", + # "Ferguson2024gray_easy-value_delta", "Ferguson2024gray_hard-value_delta", "Ferguson2024half-value_delta", + # "Ferguson2024juncture-value_delta", "Ferguson2024lle-value_delta", "Ferguson2024llh-value_delta", + # "Ferguson2024quarter-value_delta", "Ferguson2024round_f-value_delta", "Ferguson2024round_v-value_delta", + # "Ferguson2024tilted_line-value_delta" + ], + "neural_vision": [ + "average_vision", + "neural_vision", + "V1", "V2", "V4", "IT", + "Bracci2019.anteriorVTC-rdm", + "Coggan2024", + "tong.Coggan2024_fMRI.V1-rdm", + "tong.Coggan2024_fMRI.V2-rdm", + "tong.Coggan2024_fMRI.V4-rdm", + "tong.Coggan2024_fMRI.IT-rdm", + ] + } + admin_user = User.objects.get(id=2) + context = {'leaderboard_keys': ['behavior_vision', 'neural_vision']} + for key, key_benchmarks in track_benchmarks.items(): + benchmark_filter = lambda benchmarks: benchmarks.filter(identifier__in=key_benchmarks) + key_context = get_context(benchmark_filter=benchmark_filter, + model_filter=model_filter, + user=admin_user, + domain="vision", show_public=True, + compute_benchmark_average=True) + key_context[f"benchmarks_{key}"] = key_context['benchmarks'] + key_context[f"models_{key}"] = key_context['models'] + del key_context['benchmarks'], key_context['models'] + context = {**context, **key_context} + return render(request, 'benchmarks/competition2024.html', context) diff --git a/benchmarks/views/index.py b/benchmarks/views/index.py index 678a3b32c..3d26b6b39 100644 --- a/benchmarks/views/index.py +++ b/benchmarks/views/index.py @@ -39,10 +39,12 @@ def view(request, domain: str): return render(request, 'benchmarks/leaderboard/leaderboard.html', context) -def get_context(user=None, domain: str = "vision", benchmark_filter=None, model_filter=None, show_public=False): +def get_context(user=None, domain: str = "vision", benchmark_filter=None, model_filter=None, show_public=False, + compute_benchmark_average: bool = False): benchmarks = _collect_benchmarks(domain, user_page=True if user is not None else False, benchmark_filter=benchmark_filter) - model_rows = _collect_models(domain, benchmarks, show_public, user, score_filter=model_filter) + model_rows = _collect_models(domain, benchmarks, show_public, user, score_filter=model_filter, + compute_benchmark_average=compute_benchmark_average) # to save vertical space, we strip the lab name in front of benchmarks. uniform_benchmarks = {} # keeps the original benchmark name @@ -77,35 +79,34 @@ def get_context(user=None, domain: str = "vision", benchmark_filter=None, model_ if domain is "vision": citation_domain_url = 'https://www.biorxiv.org/content/early/2018/09/05/407007' citation_domain_title = "Brain-Score: Which Artificial Neural Network for Object Recognition is most " \ - "Brain-Like? " - citation_domain_bibtex = "@article{SchrimpfKubilius2018BrainScore,\n\t\t\t\t" \ - "title={Brain-Score: Which Artificial Neural Network for Object Recognition is most Brain-Like?},\n\t\t\t\t" \ - "author={Martin Schrimpf and Jonas Kubilius and Ha Hong and Najib J. Majaj and " \ - "Rishi Rajalingham and Elias B. Issa and Kohitij Kar and Pouya Bashivan and Jonathan " \ - "Prescott-Roy and Franziska Geiger and Kailyn Schmidt and Daniel L. K. Yamins and James J. DiCarlo},\n\t\t\t\t" \ - "journal={bioRxiv preprint},\n\t\t\t\t" \ - "year={2018},\n\t\t\t\t" \ - "url={https://www.biorxiv.org/content/10.1101/407007v2}\n\t\t\t}" + "Brain-Like?" + citation_domain_bibtex = "@article{SchrimpfKubilius2018BrainScore,\n" \ + "title={Brain-Score: Which Artificial Neural Network for Object Recognition is most Brain-Like?},\n" \ + "author={Martin Schrimpf and Jonas Kubilius and Ha Hong and Najib J. Majaj and " \ + "Rishi Rajalingham and Elias B. Issa and Kohitij Kar and Pouya Bashivan and Jonathan " \ + "Prescott-Roy and Franziska Geiger and Kailyn Schmidt and Daniel L. K. Yamins and James J. DiCarlo},\n" \ + "journal={bioRxiv preprint},\n" \ + "year={2018},\n" \ + "url={https://www.biorxiv.org/content/10.1101/407007v2}\n}" elif domain is "language": citation_domain_url = 'https://www.pnas.org/content/118/45/e2105646118' citation_domain_title = "The neural architecture of language: Integrative modeling converges on predictive processing" - citation_domain_bibtex = "@article{schrimpf2021neural,\n\t\t\t\t" \ - "title={The neural architecture of language: Integrative modeling converges on predictive processing},\n\t\t\t\t" \ - "author={Schrimpf, Martin and Blank, Idan Asher and Tuckute, Greta and Kauf, Carina " \ - "and Hosseini, Eghbal A and Kanwisher, Nancy and Tenenbaum, Joshua B and Fedorenko, Evelina},\n\t\t\t\t" \ - "journal={Proceedings of the National Academy of Sciences},\n\t\t\t\t" \ - "volume={118},\n\t\t\t\t" \ - "number={45},\n\t\t\t\t" \ - "pages={e2105646118},\n\t\t\t\t" \ - "year={2021},\n\t\t\t\t" \ - "publisher={National Acad Sciences}\n\t\t\t" \ - "}" + citation_domain_bibtex = "@article{schrimpf2021neural,\n" \ + "title={The neural architecture of language: Integrative modeling converges on predictive processing},\n" \ + "author={Schrimpf, Martin and Blank, Idan Asher and Tuckute, Greta and Kauf, Carina " \ + "and Hosseini, Eghbal A and Kanwisher, Nancy and Tenenbaum, Joshua B and Fedorenko, Evelina},\n" \ + "journal={Proceedings of the National Academy of Sciences},\n" \ + "volume={118},\n" \ + "number={45},\n" \ + "pages={e2105646118},\n" \ + "year={2021},\n" \ + "publisher={National Acad Sciences}\n" \ + "}" else: citation_domain_url = '' citation_domain_title = '' citation_domain_bibtex = '' - benchmark_names = [b.identifier for b in list(filter(lambda b: b.number_of_all_children == 0, benchmarks))] return {'domain': domain, 'models': model_rows, 'benchmarks': benchmarks, 'benchmark_names': benchmark_names, @@ -115,14 +116,14 @@ def get_context(user=None, domain: str = "vision", benchmark_filter=None, model_ "comparison_data": json.dumps(comparison_data), 'citation_general_url': 'https://www.cell.com/neuron/fulltext/S0896-6273(20)30605-X', 'citation_general_title': 'Integrative Benchmarking to Advance Neurally Mechanistic Models of Human Intelligence', - 'citation_general_bibtex': '@article{Schrimpf2020integrative,\n\t\t\t\t' + 'citation_general_bibtex': '@article{Schrimpf2020integrative,\n' 'title={Integrative Benchmarking to Advance ' - 'Neurally Mechanistic Models of Human Intelligence},\n\t\t\t\t' + 'Neurally Mechanistic Models of Human Intelligence},\n' 'author={Schrimpf, Martin and Kubilius, Jonas and Lee, Michael J and Murty, ' - 'N Apurva Ratan and Ajemian, Robert and DiCarlo, James J},\n\t\t\t\t' - 'journal={Neuron},\n\t\t\t\t' - 'year={2020},\n\t\t\t\t' - 'url={https://www.cell.com/neuron/fulltext/S0896-6273(20)30605-X}\n\t\t\t}', + 'N Apurva Ratan and Ajemian, Robert and DiCarlo, James J},\n' + 'journal={Neuron},\n' + 'year={2020},\n' + 'url={https://www.cell.com/neuron/fulltext/S0896-6273(20)30605-X}\n}', 'citation_domain_url': citation_domain_url, 'citation_domain_title': citation_domain_title, 'citation_domain_bibtex': citation_domain_bibtex, @@ -236,7 +237,8 @@ def _collect_submittable_benchmarks(benchmarks, user): return benchmark_selection -def _collect_models(domain: str, benchmarks, show_public, user=None, score_filter=None): +def _collect_models(domain: str, benchmarks, show_public, user=None, score_filter=None, + compute_benchmark_average: bool = False): """ :param user: The user whose profile we are currently on, if any """ @@ -388,7 +390,7 @@ def _collect_models(domain: str, benchmarks, show_public, user=None, score_filte color=representative_color(None, min_value=0, max_value=1), comment="") # - convert scores DataFrame into rows - data = [] + model_rows = [] for model_id, group in tqdm(scores.groupby('model'), desc='model rows'): model_scores = {} # fill in computed scores @@ -444,10 +446,31 @@ def _collect_models(domain: str, benchmarks, show_public, user=None, score_filte scores=model_scores, rank=rank, build_status=build_status, submitter=submitter, submission_id=submission_id, jenkins_id=jenkins_id, timestamp=timestamp ) - data.append(model_row) - data = list(sorted(data, key=lambda model_row: model_row.rank)) + model_rows.append(model_row) + + model_rows = list(sorted(model_rows, key=lambda model_row: model_row.rank)) + + if compute_benchmark_average: + benchmark_averages = scores.fillna(0).groupby('benchmark').mean() + model_scores = [ScoreDisplay( + benchmark=benchmark.identifier, + versioned_benchmark_identifier=benchmark_averages.loc[benchmark.identifier]['benchmark_version'], + score_ceiled=represent(benchmark_averages.loc[benchmark.identifier]['score_ceiled']), + score_raw=benchmark_averages.loc[benchmark.identifier]['score_raw'], + error=None, + color='gray', comment=None) + for benchmark in benchmarks] + average_row = ModelRow( + id=None, + name="Benchmark average", + reference_identifier=None, reference_link=None, + user=None, public=True, competition=None, domain=domain, + scores=model_scores, rank=None, build_status=None, + submitter=None, submission_id=None, jenkins_id=None, timestamp=None + ) + model_rows.insert(0, average_row) - return data + return model_rows def _get_benchmark_shortname(benchmark_type_identifier: str): @@ -554,6 +577,7 @@ def get_visibility(model, user): else: return "public" + # Adds python functions so the HTML can do several things @register.filter def get_item(dictionary, key):