Skip to content

Commit

Permalink
Migrate to tree-sitter-language-pack: expand language support and add…
Browse files Browse the repository at this point in the history
…ress maintenance

Resolves Aider-AI#7.

This commit replaces the tree-sitter language pack from
grantjenks/py-tree-sitter-languages with
Goldziher/tree-sitter-language-pack, significantly expanding language
support and addressing maintenance issues. Key changes include:

1. Greatly increases the number of supported languages, including Swift
   and Svelte.
2. Resolves dependency on an unmaintained package that was forcing
   grep-ast to use an old tree-sitter version (0.21).
3. Unlocks the ability to use more recent tree-sitter versions.
4. Updates requirements.txt to use tree-sitter-language-pack>=0.2.0.
5. Increments the version number to 0.3.4-dev in setup.py.
6. Adds extensive test cases for parsing various languages in
   test_parsers.py.

Notable changes:
- Removed support for DOT, OCaml, ql (GitHub CodeQL), and tsq (Tree
  Sitter Query) due to their absence in the new pack.
- Removed potentially incorrect mappings for .gomod, .sqlite, and .regex
  extensions.
- Replaced the uncommon ".et" mapping for "embeddedtemplate" with
  mappings for ERB and EJS, which are common uses of embedded templates.
- Re-enabled markdown as the new pack uses to a different markdown
  grammar that likely doesn't suffer from previous bugs.
  • Loading branch information
gohanlon committed Aug 23, 2024
1 parent a5dd50c commit 1881413
Show file tree
Hide file tree
Showing 5 changed files with 2,138 additions and 22 deletions.
2 changes: 1 addition & 1 deletion grep_ast/grep_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import re

from tree_sitter_languages import get_parser
from tree_sitter_language_pack import get_parser

from .dump import dump # noqa: F401
from .parsers import filename_to_lang
Expand Down
166 changes: 149 additions & 17 deletions grep_ast/parsers.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,189 @@
import os

# Updated mapping of file extensions to parsers
PARSERS = {
".py": "python",
".js": "javascript",
".mjs": "javascript", # mjs file extension stands for "module JavaScript."
".go": "go",
".as": "actionscript",
".adb": "ada",
".ads": "ada",
".agda": "agda",
".ino": "arduino",
".s": "asm",
".asm": "asm",
".astro": "astro",
".sh": "bash",
".bash": "bash",
".beancount": "beancount",
".bib": "bibtex",
".bicep": "bicep",
".bb": "bitbake",
".c": "c",
".cc": "cpp",
".cs": "c_sharp",
".h": "c",
".cairo": "cairo",
".capnp": "capnp",
".chatito": "chatito",
".clar": "clarity",
".clj": "clojure",
".cljs": "clojure",
".cmake": "cmake",
".cmake.in": "cmake",
".lisp": "commonlisp",
".cl": "commonlisp",
".cpon": "cpon",
".cpp": "cpp",
".cc": "cpp",
".hpp": "cpp",
".hh": "cpp",
".cs": "csharp",
".css": "css",
".csv": "csv",
".cu": "cuda",
".cuh": "cuda",
".d": "d",
".dart": "dart",
".dockerfile": "dockerfile",
".dot": "dot",
".dox": "doxygen",
".dtd": "dtd",
".el": "elisp",
".ex": "elixir",
".exs": "elixir",
".elm": "elm",
".et": "embedded_template",
".erb": "embeddedtemplate", # ERB (Embedded Ruby) templates
".ejs": "embeddedtemplate", # EJS (Embedded JavaScript) templates
".erl": "erlang",
".gomod": "gomod",
".hrl": "erlang",
".fnl": "fennel",
".fir": "firrtl",
".fish": "fish",
".f90": "fortran",
".f95": "fortran",
".f03": "fortran",
".f08": "fortran",
".fun": "func",
".gd": "gdscript",
".gitattributes": "gitattributes",
".gitcommit": "gitcommit",
".gitignore": "gitignore",
".gleam": "gleam",
".glsl": "glsl",
".gn": "gn",
".go": "go",
".mod": "gomod",
".sum": "gosum",
".groovy": "groovy",
".launch": "gstlaunch",
".hack": "hack",
".ha": "hare",
".hs": "haskell",
".hx": "haxe",
".hcl": "hcl",
".tf": "hcl",
".heex": "heex",
".hlsl": "hlsl",
".html": "html",
".htm": "html",
".hypr": "hyprlang",
".ispc": "ispc",
".janet": "janet",
".java": "java",
".js": "javascript",
".mjs": "javascript", # mjs file extension stands for "module JavaScript."
".jsdoc": "jsdoc",
".json": "json",
".jsonnet": "jsonnet",
".jl": "julia",
".kconfig": "kconfig",
".kdl": "kdl",
".kt": "kotlin",
".ld": "linkerscript",
".ll": "llvm",
".lua": "lua",
".luadoc": "luadoc",
# ".???": "luap", # "luap" is not a standalone language
".luau": "luau",
".magik": "magik",
".mk": "make",
# ".md": "markdown", # https://github.com/ikatyang/tree-sitter-markdown/issues/59
".makefile": "make",
".md": "markdown", # TODO: verify that markdown grammar used by Goldziher’s tree-sitter languages pack doesn't suffer from https://github.com/ikatyang/tree-sitter-markdown/issues/59
# ".m": "matlab", # both matlab and objc use ".m" extension; we choose to map to objc
".mermaid": "mermaid",
".mmd": "mermaid",
".meson": "meson",
".ninja": "ninja",
".nix": "nix",
".nqc": "nqc",
".m": "objc",
".ml": "ocaml",
".mm": "objc",
".odin": "odin",
".org": "org",
".pas": "pascal",
".pem": "pem",
".pl": "perl",
".pm": "perl",
".pgn": "pgn",
".php": "php",
".ql": "ql",
".po": "po",
".pony": "pony",
".ps1": "powershell",
".psm1": "powershell",
".printf": "printf",
".prisma": "prisma",
".properties": "properties",
".psv": "psv",
".pp": "puppet",
".purs": "purescript",
".in": "pymanifest",
".py": "python",
".qmldir": "qmldir",
".qml": "qmljs",
".r": "r",
".R": "r",
".regex": "regex",
".rkt": "racket",
# ".???": "re2c", # re2c is not a standalone language
".inputrc": "readline",
".requirements": "requirements",
".ron": "ron",
".rst": "rst",
".rb": "ruby",
".rs": "rust",
".scala": "scala",
".sc": "scala",
".scm": "scheme",
".ss": "scheme",
".scss": "scss",
".smali": "smali",
".smithy": "smithy",
".sol": "solidity",
".sql": "sql",
".sqlite": "sqlite",
".nut": "squirrel",
".star": "starlark",
".svelte": "svelte",
".swift": "swift",
".td": "tablegen",
".tcl": "tcl",
".thrift": "thrift",
".toml": "toml",
".tsq": "tsq",
".tsx": "typescript",
".tsv": "tsv",
".tsx": "tsx",
".twig": "twig",
".ts": "typescript",
".typ": "typst",
".rules": "udev",
".ungram": "ungrammar",
".tal": "uxntal",
# ".v": "v", # ".v" is overloaded: vlang, verilog (and coq). vlang grammar has 24 stars as of 2024-08-22
".v": "verilog", # verilog grammar has 90 stars as of 2024-08-22
".sv": "verilog",
".svh": "verilog",
".vhd": "vhdl",
".vhdl": "vhdl",
".vim": "vim",
".vue": "vue",
".wgsl": "wgsl",
".XCompose": "xcompose",
".xml": "xml",
".yaml": "yaml",
".yml": "yaml",
".yuck": "yuck",
".zig": "zig",
}


Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
tree-sitter-languages>=1.8.0
tree-sitter-language-pack>=0.2.0
pathspec
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

setup(
name="grep-ast",
version="0.3.3",
version="0.3.4-dev",
description="A tool to grep through the AST of a source file",
url="https://github.com/paul-gauthier/grep-ast",
long_description=long_description,
Expand Down
Loading

0 comments on commit 1881413

Please sign in to comment.