Skip to content

Commit 1aeddef

Browse files
authored
Merge pull request #34 from tmr232/scan-codebase
Create script for scanning full codebase
2 parents cd35cfa + f8cee8f commit 1aeddef

13 files changed

+173
-55
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Check [Keep a Changelog](http://keepachangelog.com/) for recommendations on how
1717
- The `generate-parsers.ts` script has been updated to support copying existing `.wasm` files from tree-sitter grammar packages
1818
- Initial support for C++
1919
- A basic [typedoc](https://typedoc.org/) configuration was added, to help in rendering docs
20+
- A utility script for running CFG builders on a complete codebase (`scan-codebase.ts`)
2021

2122
### Changed
2223

docs/AddNewLanguage.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ Search for `ADD-LANGUAGES-HERE` in the code, and add the language in all the rel
7373
Those will include:
7474

7575
- Language & builder definitions in `src/control-flow/cfg.ts`
76-
- Mapping languages to `.wasm` files in `src/components/utils.ts`
76+
- Mapping languages to `.wasm` files in `src/components/parser-loader/wasmMappings.ts`
7777
- Mapping VSCode's `languageId` to our language definitions in `src/vscode/extension.ts`
7878
- Adding test-collectors and tests in `src/test/commentTestCollector.ts`
7979
- Adding the language in the demo's UI in `src/components/Demo.svelte`

scripts/generate-parsers.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* parsers in to the `./parsers` directory.
44
*
55
* To add a new parsers, add it's package name to the `parsersToBuild` array.
6+
* @module
67
*/
78
import { $ } from "bun";
89
import * as fs from "node:fs";

scripts/scan-codebase.ts

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/**
2+
* This script allows running the CFG builders on all the functions of a given
3+
* code base.
4+
* This can be useful in finding CFG generation bugs.
5+
*
6+
* @module
7+
*/
8+
import { parseArgs } from "node:util";
9+
import {
10+
functionNodeTypes,
11+
type Language,
12+
newCFGBuilder,
13+
supportedLanguages,
14+
} from "../src/control-flow/cfg";
15+
import { Glob } from "bun";
16+
import type Parser from "web-tree-sitter";
17+
import { initializeParser } from "../src/parser-loader/bun.ts";
18+
import * as path from "node:path";
19+
20+
type FileType = { ext: string; language: Language };
21+
const fileTypes: FileType[] = [
22+
{ ext: "c", language: "C" },
23+
{ ext: "cpp", language: "C++" },
24+
{ ext: "h", language: "C++" },
25+
{ ext: "hh", language: "C++" },
26+
{ ext: "hpp", language: "C++" },
27+
{ ext: "cc", language: "C++" },
28+
{ ext: "py", language: "Python" },
29+
{ ext: "go", language: "Go" },
30+
];
31+
32+
const parsers: { [language in Language]: Parser } = Object.fromEntries(
33+
await (async () => {
34+
const parsers = [];
35+
for (const language of supportedLanguages) {
36+
parsers.push([language, (await initializeParser(language)).parser]);
37+
}
38+
return parsers;
39+
})(),
40+
);
41+
42+
const extToLanguage: Map<string, Language> = new Map(
43+
fileTypes.map(({ ext, language }) => [`.${ext}`, language]),
44+
);
45+
46+
function getLanguage(filename: string): Language {
47+
const ext = path.extname(filename).toLowerCase();
48+
const language = extToLanguage.get(ext);
49+
if (!language) {
50+
throw new Error(`Unsupported extension ${ext}`);
51+
}
52+
return language;
53+
}
54+
55+
function iterSourceFiles(root: string): IterableIterator<string> {
56+
const sourceGlob = new Glob(
57+
`**/*.{${fileTypes.map(({ ext }) => ext).join(",")}}`,
58+
);
59+
return sourceGlob.scanSync(root);
60+
}
61+
62+
function* iterFunctions(
63+
code: string,
64+
language: Language,
65+
): IterableIterator<Parser.SyntaxNode> {
66+
const tree = parsers[language].parse(code);
67+
68+
const cursor = tree.walk();
69+
function* visitNode(): IterableIterator<Parser.SyntaxNode> {
70+
if (functionNodeTypes[language].includes(cursor.nodeType)) {
71+
yield cursor.currentNode;
72+
}
73+
74+
if (cursor.gotoFirstChild()) {
75+
do {
76+
yield* visitNode();
77+
} while (cursor.gotoNextSibling());
78+
cursor.gotoParent();
79+
}
80+
}
81+
yield* visitNode();
82+
}
83+
84+
async function main() {
85+
const { values } = parseArgs({
86+
args: Bun.argv,
87+
options: {
88+
root: {
89+
type: "string",
90+
},
91+
},
92+
strict: true,
93+
allowPositionals: true,
94+
});
95+
96+
const root = values.root ?? ".";
97+
98+
for (const filename of iterSourceFiles(root)) {
99+
const filepath = path.join(root, filename);
100+
const code = await Bun.file(filepath).text();
101+
const language = getLanguage(filename);
102+
for (const func of iterFunctions(code, language)) {
103+
const builder = newCFGBuilder(language, {});
104+
const cfg = builder.buildCFG(func);
105+
console.log(filepath, func.startPosition, cfg.graph.order);
106+
}
107+
}
108+
}
109+
110+
await main();

src/components/utils.ts

Lines changed: 5 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,18 @@
1-
import Parser from "web-tree-sitter";
2-
3-
import treeSitterGo from "../../parsers/tree-sitter-go.wasm?url";
4-
import treeSitterC from "../../parsers/tree-sitter-c.wasm?url";
5-
import treeSitterPython from "../../parsers/tree-sitter-python.wasm?url";
6-
import treeSitterCore from "../../parsers/tree-sitter.wasm?url";
7-
import treeSitterCpp from "../../parsers/tree-sitter-cpp.wasm?url";
1+
import type Parser from "web-tree-sitter";
82
import {
9-
newCFGBuilder,
3+
functionNodeTypes,
104
type Language,
5+
newCFGBuilder,
116
supportedLanguages,
12-
functionNodeTypes,
137
} from "../control-flow/cfg";
148
import type { TestFuncRecord } from "../test/commentTestUtils";
159
import type { TestFunction } from "../test/commentTestTypes";
1610
import { requirementTests } from "../test/commentTestHandlers";
1711
import { simplifyCFG, trimFor } from "../control-flow/graph-ops";
1812
import { type CFG, mergeNodeAttrs } from "../control-flow/cfg-defs";
1913
import { graphToDot } from "../control-flow/render";
20-
import { Graphviz, type Format } from "@hpcc-js/wasm-graphviz";
21-
22-
// ADD-LANGUAGES-HERE
23-
const wasmMapping: { [language in Language]: string } = {
24-
C: treeSitterC,
25-
Go: treeSitterGo,
26-
Python: treeSitterPython,
27-
"C++": treeSitterCpp,
28-
};
29-
30-
async function initializeParser(language: Language) {
31-
await Parser.init({
32-
locateFile(_scriptName: string, _scriptDirectory: string) {
33-
return treeSitterCore;
34-
},
35-
});
36-
const parserLanguage = await Parser.Language.load(wasmMapping[language]);
37-
const parser = new Parser();
38-
parser.setLanguage(parserLanguage);
39-
return parser;
40-
}
14+
import { type Format, Graphviz } from "@hpcc-js/wasm-graphviz";
15+
import { initializeParser } from "../parser-loader/vite.ts";
4116

4217
export type Parsers = { [language in Language]: Parser };
4318

src/parser-loader/bun.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import type { Language } from "../control-flow/cfg.ts";
2+
import Parser from "web-tree-sitter";
3+
import { wasmMapping } from "./wasmMapping.ts";
4+
5+
export async function initializeParser(
6+
language: Language,
7+
): Promise<{ parser: Parser; language: Parser.Language }> {
8+
await Parser.init();
9+
10+
const parserLanguage = await Parser.Language.load(wasmMapping[language]);
11+
const parser = new Parser();
12+
parser.setLanguage(parserLanguage);
13+
return { parser, language: parserLanguage };
14+
}

src/parser-loader/vite.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import type { Language } from "../control-flow/cfg.ts";
2+
import Parser from "web-tree-sitter";
3+
import treeSitterCore from "../../parsers/tree-sitter.wasm?url";
4+
import { wasmMapping } from "./wasmMapping.ts";
5+
6+
export async function initializeParser(language: Language) {
7+
await Parser.init({
8+
locateFile(_scriptName: string, _scriptDirectory: string) {
9+
return treeSitterCore;
10+
},
11+
});
12+
const parserLanguage = await Parser.Language.load(wasmMapping[language]);
13+
const parser = new Parser();
14+
parser.setLanguage(parserLanguage);
15+
return parser;
16+
}

src/parser-loader/wasmMapping.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import type { Language } from "../control-flow/cfg.ts";
2+
import treeSitterC from "../../parsers/tree-sitter-c.wasm?url";
3+
import treeSitterGo from "../../parsers/tree-sitter-go.wasm?url";
4+
import treeSitterPython from "../../parsers/tree-sitter-python.wasm?url";
5+
import treeSitterCpp from "../../parsers/tree-sitter-cpp.wasm?url";
6+
7+
// ADD-LANGUAGES-HERE
8+
export const wasmMapping: { [language in Language]: string } = {
9+
C: treeSitterC,
10+
Go: treeSitterGo,
11+
Python: treeSitterPython,
12+
"C++": treeSitterCpp,
13+
};

src/test/collect-c.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import type Parser from "web-tree-sitter";
2-
import treeSitterC from "../../parsers/tree-sitter-c.wasm?url";
32
import { parseComment } from "./commentTestUtils";
43
import type { TestFunction } from "./commentTestTypes";
5-
import { initializeParser } from "./parser-init";
64

7-
const { parser, language } = await initializeParser(treeSitterC);
5+
import { initializeParser } from "../parser-loader/bun.ts";
6+
7+
const { parser, language } = await initializeParser("C");
88

99
export function getTestFuncs(code: string): Generator<TestFunction> {
1010
const tree = parser.parse(code);

src/test/collect-cpp.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import type Parser from "web-tree-sitter";
2-
import treeSitterCpp from "../../parsers/tree-sitter-cpp.wasm?url";
32
import { parseComment } from "./commentTestUtils";
43
import type { TestFunction } from "./commentTestTypes";
5-
import { initializeParser } from "./parser-init";
64

7-
const { parser, language } = await initializeParser(treeSitterCpp);
5+
import { initializeParser } from "../parser-loader/bun.ts";
6+
7+
const { parser, language } = await initializeParser("C++");
88

99
export function getTestFuncs(code: string): Generator<TestFunction> {
1010
const tree = parser.parse(code);

src/test/collect-go.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import type Parser from "web-tree-sitter";
2-
import treeSitterGo from "../../parsers/tree-sitter-go.wasm?url";
32
import { parseComment } from "./commentTestUtils";
43
import type { TestFunction } from "./commentTestTypes";
5-
import { initializeParser } from "./parser-init";
64

7-
const { parser } = await initializeParser(treeSitterGo);
5+
import { initializeParser } from "../parser-loader/bun.ts";
6+
7+
const { parser } = await initializeParser("Go");
88

99
export function getTestFuncs(code: string): Generator<TestFunction> {
1010
const tree = parser.parse(code);

src/test/collect-python.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import type Parser from "web-tree-sitter";
2-
import treeSitterPython from "../../parsers/tree-sitter-python.wasm?url";
32
import { parseComment } from "./commentTestUtils";
43
import type { TestFunction } from "./commentTestTypes";
5-
import { initializeParser } from "./parser-init";
64

7-
const { parser, language } = await initializeParser(treeSitterPython);
5+
import { initializeParser } from "../parser-loader/bun.ts";
6+
7+
const { parser, language } = await initializeParser("Python");
88

99
export function getTestFuncs(code: string): Generator<TestFunction> {
1010
const tree = parser.parse(code);

src/test/parser-init.ts

Lines changed: 0 additions & 12 deletions
This file was deleted.

0 commit comments

Comments
 (0)