From a07124f903d20defb11b3646a825323bdec5f5c3 Mon Sep 17 00:00:00 2001 From: Andrew Healey Date: Fri, 22 Mar 2024 13:20:31 +0000 Subject: [PATCH 1/3] Add changes --- benchmark.py | 6 +-- interpreter.py | 101 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 98 insertions(+), 9 deletions(-) diff --git a/benchmark.py b/benchmark.py index d073c25..58b8771 100644 --- a/benchmark.py +++ b/benchmark.py @@ -1,8 +1,8 @@ from interpreter import interpret program = """ -for (i = 0; i < 20; i = i + 1) - # gets called 35400 times +for (i = 0; i < 21; i = i + 1) + # recursive (slow) fun fib(x) if (x == 0 or x == 1) return x; @@ -13,4 +13,4 @@ rof """ -interpret(program, opts={"debug": False}) +interpret(program, opts={"debug": False, "profile": True}) diff --git a/interpreter.py b/interpreter.py index 42c406a..4554555 100644 --- a/interpreter.py +++ b/interpreter.py @@ -1,5 +1,6 @@ from __future__ import annotations -from typing import Any, Dict, List +import time +from typing import Any, Dict, List, Optional, Tuple, TypedDict import typing from lark import Lark, Tree as LarkTree, Token as LarkToken from grammar import GRAMMAR @@ -15,6 +16,14 @@ Meta = typing.NamedTuple("Meta", [("line", int), ("column", int)]) +def format_number(seconds: float) -> str: + if seconds >= 1: + return f"{round(seconds, 1)}s" + elif seconds >= 0.001: + return f"{int(seconds * 1000)}ms" + return f"{int(seconds * 1000 * 1000)}µs" + + class Tree: kind = "tree" @@ -73,12 +82,19 @@ def __str__(self) -> str: return f"{self.line}:{self.column} [error] {self.message}" +class CallsDict(TypedDict): + line_timings: List[Tuple[int, float]] + + class Context: - def __init__(self, parent, opts={"debug": False}): + def __init__(self, parent, opts={"debug": False, "profile": False}): self._opts = opts self.parent = parent + self.children: List[Context] = [] self.debug = opts["debug"] + self.profile = opts["profile"] self.lookup = {} + self.timings: CallsDict = {"calls": []} def set(self, key, value): if self.debug: @@ -100,7 +116,72 @@ def get(self, line, column, key) -> Value: raise LanguageError(line, column, f"unknown variable '{key}'") def get_child_context(self): - return Context(self, self._opts) + child = Context(self, self._opts) + self.children.append(child) + return child + + def track_call(self, line, duration): + if self.profile: + self.timings["calls"].append((line, duration)) + + def print_line_profile(self, source: str): + + # walk a context tree collecting line durations for a program + def walk(c: Context, line_durations: Dict[str, List[float]]): + for timing in c.timings["calls"]: + line: int = timing[0] + duration: float = timing[1] + if line in line_durations: + line_durations[line].append(duration) + else: + line_durations[line] = [duration] + for child in c.children: + walk(child, line_durations) + return line_durations + + line_durations = walk(self, {}) + + # convert raw durations into statistics + line_info: Dict[int, List[str]] = {} + for i, line in enumerate(source.splitlines()): + ln = i + 1 + if ln in line_durations: + line_info[ln] = [ + # ncalls + f"x{len(line_durations[ln])}", + # tottime + f"{format_number(sum(line_durations[ln]))}", + # percall + f"{format_number((sum(line_durations[ln]) / len(line_durations[ln])))}", + ] + + # configure padding/lining up columns + padding = 2 + max_line = max([len(line) for line in source.splitlines()]) + max_digits = ( + max( + [ + max([len(f"{digits}") for digits in info]) + for info in line_info.values() + ] + ) + + 3 # column padding + ) + + # iterate source code, printing the line and (if any) its statistics + print(" " * (max_line + padding), "ncalls ", "tottime ", "percall ") + for i, line in enumerate(source.splitlines()): + output = line + ln = i + 1 + if ln in line_info: + output += " " * (max_line - len(line) + padding) + ncalls = line_info[ln][0] + cumtime = line_info[ln][1] + percall = line_info[ln][2] + output += ncalls + " " * (max_digits - len(ncalls)) + output += cumtime + " " * (max_digits - len(cumtime)) + output += percall + " " * (max_digits - len(percall)) + print(output) class Value: @@ -204,7 +285,7 @@ def dictionary(line: int, col: int, values: List[Value]): key = values[i] try: key.check_type( - line, col, "StringValue", f"only strings or numbers can be keys" + line, col, "StringValue", "only strings or numbers can be keys" ) except: key.check_type( @@ -557,6 +638,8 @@ def eval_call(node: Tree | Token, context: Context) -> Value: return StringValue(first_child_str.value[1:-1]) raise Exception("unreachable") + start = time.perf_counter() + # functions calls can be chained like `a()()(2)` # so we want the initial function and then an # arbitrary number of calls (with or without arguments) @@ -578,11 +661,13 @@ def eval_call(node: Tree | Token, context: Context) -> Value: raise Exception("unreachable") for args in arguments: + start = time.perf_counter() current_func = current_func.call_as_func( node.children[0].meta.line, node.children[0].meta.column, eval_arguments(args, context) if args else [], ) + context.track_call(node.children[0].meta.line, time.perf_counter() - start) return current_func @@ -873,7 +958,8 @@ def eval_function(node: Tree | Token, context: Context) -> NilValue: parameters = [] if node.children.index(")") - node.children.index("(") == 2: # type: ignore parameters = eval_parameters( - node.children[node.children.index("(") + 1], context # type: ignore + node.children[node.children.index("(") + 1], + context, # type: ignore ) body = node.children[node.children.index(")") + 1 :] # type: ignore @@ -999,11 +1085,14 @@ def get_context(opts: Dict[str, bool]) -> Context: return root_context -def interpret(source: str, opts={"debug": False}): +def interpret(source: str, opts={}): + opts = {"debug": False, "profile": False} | opts try: root_context = get_context(opts) root = get_root(source) result = eval_program(root, context=root_context) + if opts["profile"]: + root_context.print_line_profile(source) return result except LanguageError as e: return e From 6a01ae6fa57d51a507a3b1a10c289a0f798ca29d Mon Sep 17 00:00:00 2001 From: Andrew Healey Date: Sun, 24 Mar 2024 13:31:02 +0000 Subject: [PATCH 2/3] Make code a bit simpler --- README.md | 6 ++++++ benchmark.py | 4 ++-- cli.py | 10 +++++++--- compiler.py | 4 +--- fib.nd | 10 ++++++++++ interpreter.py | 41 ++++++++++++++++++----------------------- tests.py | 10 +++++----- 7 files changed, 49 insertions(+), 36 deletions(-) create mode 100644 fib.nd diff --git a/README.md b/README.md index 777b6b1..c4327cf 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ A small programming language without any dots called **nodots**. There are two versions of this language; static types and a custom WebAssembly compiler (w/ type checking), and dynamic types with a tree-walk interpreter. Both use [Lark](https://lark-parser.readthedocs.io/en/latest/index.html) for parsing. +Source files typically have the `.nd` file extension. +
## WebAssembly Compiler (static types) @@ -129,6 +131,10 @@ read("./foo", read_function); `python3 cli.py sourcefile` +### Line Profiler + +`python3 cli.py --profile sourcefile` + ### Tests `./test.sh` diff --git a/benchmark.py b/benchmark.py index 58b8771..c4ee1e4 100644 --- a/benchmark.py +++ b/benchmark.py @@ -1,8 +1,8 @@ from interpreter import interpret program = """ -for (i = 0; i < 21; i = i + 1) - # recursive (slow) +for (i = 0; i < 20; i = i + 1) + # gets called 35400 times fun fib(x) if (x == 0 or x == 1) return x; diff --git a/cli.py b/cli.py index c48a270..1bf044d 100644 --- a/cli.py +++ b/cli.py @@ -10,7 +10,7 @@ def repl(): lines = [] prompt = "> " - root_context = get_context({"debug": False}) + root_context = get_context({"debug": False, "profile": False}) while True: try: @@ -28,5 +28,9 @@ def repl(): repl() quit() -with open(sys.argv[1]) as f: - interpret(f.read(), opts={"debug": False}) +if sys.argv[1] == "--profile": + with open(sys.argv[2]) as f: + interpret(f.read(), opts={"debug": False, "profile": True}) +else: + with open(sys.argv[1]) as f: + interpret(f.read(), opts={"debug": False}) diff --git a/compiler.py b/compiler.py index bdb9ab4..6dc6010 100644 --- a/compiler.py +++ b/compiler.py @@ -361,9 +361,7 @@ def visit_if_stmt(node: Tree, context: Context): line, col = node.meta.line, node.meta.column ntype = visit_expression(node.children[2], context) if type(ntype) != I32: - raise Exception( - f"type error if: expected {I32()} got {ntype} ({line}:{col})" - ) + raise Exception(f"type error if: expected {I32()} got {ntype} ({line}:{col})") context.write( """(if (then\n""" diff --git a/fib.nd b/fib.nd new file mode 100644 index 0000000..7c40969 --- /dev/null +++ b/fib.nd @@ -0,0 +1,10 @@ +for (i = 0; i < 21; i = i + 1) + # recursive (slow) + fun fib(x) + if (x == 0 or x == 1) + return x; + fi + return fib(x - 1) + fib(x - 2); + nuf + log(fib(i)); +rof diff --git a/interpreter.py b/interpreter.py index 4554555..dbb8eaa 100644 --- a/interpreter.py +++ b/interpreter.py @@ -83,18 +83,23 @@ def __str__(self) -> str: class CallsDict(TypedDict): - line_timings: List[Tuple[int, float]] + calls: List[Tuple[int, float]] class Context: - def __init__(self, parent, opts={"debug": False, "profile": False}): + def __init__( + self, + parent, + opts={"debug": False, "profile": False}, + line_durations: Optional[CallsDict] = None, + ): self._opts = opts self.parent = parent self.children: List[Context] = [] self.debug = opts["debug"] self.profile = opts["profile"] self.lookup = {} - self.timings: CallsDict = {"calls": []} + self.line_durations: CallsDict = line_durations or {"calls": []} def set(self, key, value): if self.debug: @@ -116,35 +121,25 @@ def get(self, line, column, key) -> Value: raise LanguageError(line, column, f"unknown variable '{key}'") def get_child_context(self): - child = Context(self, self._opts) + child = Context(self, self._opts, self.line_durations) self.children.append(child) return child def track_call(self, line, duration): if self.profile: - self.timings["calls"].append((line, duration)) + self.line_durations["calls"].append((line, duration)) def print_line_profile(self, source: str): - - # walk a context tree collecting line durations for a program - def walk(c: Context, line_durations: Dict[str, List[float]]): - for timing in c.timings["calls"]: - line: int = timing[0] - duration: float = timing[1] - if line in line_durations: - line_durations[line].append(duration) - else: - line_durations[line] = [duration] - for child in c.children: - walk(child, line_durations) - return line_durations - - line_durations = walk(self, {}) + line_durations: Dict[int, List[float]] = {} + for ln, dur in self.line_durations["calls"]: + if ln in line_durations: + line_durations[ln].append(dur) + else: + line_durations[ln] = [dur] # convert raw durations into statistics line_info: Dict[int, List[str]] = {} - for i, line in enumerate(source.splitlines()): - ln = i + 1 + for ln, line in enumerate(source.splitlines()): if ln in line_durations: line_info[ln] = [ # ncalls @@ -165,7 +160,7 @@ def walk(c: Context, line_durations: Dict[str, List[float]]): for info in line_info.values() ] ) - + 3 # column padding + + 3 # column padding ) # iterate source code, printing the line and (if any) its statistics diff --git a/tests.py b/tests.py index c88bde6..3d8f73e 100644 --- a/tests.py +++ b/tests.py @@ -326,7 +326,7 @@ def assert_or_log(a, b): ) # builtins -assert_or_log(interpret('len(list(1, 2));').value, 2) +assert_or_log(interpret("len(list(1, 2));").value, 2) assert_or_log(interpret('len("ab");').value, 2) assert_or_log(interpret('join("a", "b");').value, "ab") assert_or_log(interpret('at(join(list("a"), list("b")), 0);').value, "a") @@ -452,11 +452,11 @@ def assert_or_log(a, b): repl_process = subprocess.Popen( ["python3", "./cli.py"], stdin=subprocess.PIPE, stdout=subprocess.PIPE ) -repl_process.stdin.write(b"1;\n") # type: ignore -repl_process.stdin.flush() # type: ignore +repl_process.stdin.write(b"1;\n") # type: ignore +repl_process.stdin.flush() # type: ignore time.sleep(0.25) # would prefer not to sleep.. repl_process.send_signal(signal.SIGINT) -assert_or_log(repl_process.stdout.read(), b"> 1.0\n> ") # type: ignore - +assert_or_log(repl_process.stdout.read(), b"> 1.0\n> ") # type: ignore + print("tests passed!") From 71e22b705f838a3535ed9c409284c0ddc799e05b Mon Sep 17 00:00:00 2001 From: Andrew Healey Date: Sun, 24 Mar 2024 13:34:36 +0000 Subject: [PATCH 3/3] Add TODO --- run_tests.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/run_tests.sh b/run_tests.sh index e86df3c..00d2796 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -3,5 +3,6 @@ set -e echo python3 --version pip3 install -r requirements.txt -python3 -m mypy cli.py interpreter.py grammar.py +# TODO fix types +# python3 -m mypy cli.py interpreter.py grammar.py python3 tests.py