MultiBLiMP is a massively multilingual benchmark of linguistic minimal pairs, covering 101 languages, 6 linguistic phenomena and containing more than 125,000 minimal pairs. This repository contains the code for creating the corpus and the scripts for LLM evaluation.
@misc{jumelet2025multiblimp10massivelymultilingual,
title={MultiBLiMP 1.0: A Massively Multilingual Benchmark of Linguistic Minimal Pairs},
author={Jaap Jumelet and Leonie Weissweiler and Arianna Bisazza},
year={2025},
eprint={2504.02768},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2504.02768},
}