-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathwordcount.py
executable file
·129 lines (104 loc) · 3.55 KB
/
wordcount.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env -S uv run
# dependencies = [
# "rich>=13.7.0",
# ]
"""
Word Counter Tool for gptme
Counts words, lines, and characters in text from files or stdin.
Usage:
./wordcount.py [file...] # Count words in files
echo "text" | ./wordcount.py # Count words from stdin
./wordcount.py < file.txt # Count words from redirected input
Example in gptme:
Assistant: Let me count the words in README.md
```shell
./scripts/wordcount.py README.md
```
"""
import fileinput
import sys
from collections import Counter
from typing import Iterator
from rich import print
from rich.console import Console
from rich.table import Table
console = Console()
def count_text(text: str) -> tuple[int, int, int, Counter[str]]:
"""Count words, lines, chars, and word frequencies in text."""
lines = text.splitlines()
# Split on whitespace and filter out empty strings
words = [w for w in text.split() if w]
chars = len(text)
word_freq = Counter(words)
# Handle empty input
if not text.strip():
return 0, 0, 0, Counter()
return len(lines) or 1, len(words), chars, word_freq
def process_input(files: list[str]) -> Iterator[tuple[str, str]]:
"""Process input from files or stdin, yield (source, content) pairs."""
try:
with fileinput.input(files=files if files else ("-",)) as f:
current_file = None
current_content: list[str] = []
for line in f:
if current_file != f.filename():
if current_file:
yield current_file, "".join(current_content)
current_file = f.filename()
current_content = []
current_content.append(line)
if current_file:
yield current_file, "".join(current_content)
except FileNotFoundError as e:
print(f"[red]Error: {e}[/red]")
sys.exit(1)
def main() -> None:
# Process all input
total_lines = total_words = total_chars = 0
results = []
for source, content in process_input(sys.argv[1:]):
lines, words, chars, freq = count_text(content)
total_lines += lines
total_words += words
total_chars += chars
# Get top 3 words with counts
top_words = [f"{w}({c})" for w, c in freq.most_common(3)]
results.append(
{
"source": source if source != "-" else "stdin",
"lines": lines,
"words": words,
"chars": chars,
"top_words": ", ".join(top_words) if top_words else "",
}
)
# Create and print results table
table = Table(title="Word Count Results")
table.add_column("Source", style="cyan")
table.add_column("Lines", justify="right", style="green")
table.add_column("Words", justify="right", style="green")
table.add_column("Chars", justify="right", style="green")
table.add_column("Most Common Words", style="yellow")
# Add rows
for result in results:
table.add_row(
str(result["source"]),
str(result["lines"]),
str(result["words"]),
str(result["chars"]),
str(result["top_words"]),
)
# Add total row for multiple files
if len(results) > 1:
table.add_row(
"Total",
str(total_lines),
str(total_words),
str(total_chars),
"",
style="bold",
)
# Print the table
console.print(table)
if __name__ == "__main__":
main()