-
Notifications
You must be signed in to change notification settings - Fork 5
151 lines (135 loc) · 8.71 KB
/
tests.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
name: Tests
on:
push:
branches:
- main
pull_request:
jobs:
tests:
name: Tests
runs-on: ubuntu-latest
steps:
- name: Install poppler-utils and docx2txt
run: |
sudo apt-get update
sudo apt-get install -y poppler-utils docx2txt
- name: Checkout code
uses: actions/checkout@v4
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version-file: 'pyproject.toml'
cache: 'poetry'
- name: Install Poetry dependencies
run: poetry install
- name: Write SERVICE_ACCOUNT_CREDENTIALS to service_account_credentials.json
uses: jsdaniell/[email protected]
with:
name: "service_account_credentials.json"
json: ${{ secrets.SERVICE_ACCOUNT_CREDENTIALS }}
- name: Test Tahweel on 1 page PDF file
run: |
poetry run tahweel ".github/data/test-case-1-page.pdf" --service-account-credentials service_account_credentials.json
test -f ".github/data/test-case-1-page.txt" || exit 1
test -f ".github/data/test-case-1-page.docx" || exit 1
grep -q "^بسم الله الرحمن الرحيم$" ".github/data/test-case-1-page.txt" || exit 1
rm -f .github/data/*.txt .github/data/*.docx
- name: Test Tahweel on 3 pages PDF file
run: |
poetry run tahweel ".github/data/test-case-3-pages.pdf" --service-account-credentials service_account_credentials.json
test -f ".github/data/test-case-3-pages.txt" || exit 1
test -f ".github/data/test-case-3-pages.docx" || exit 1
grep -o "PAGE_SEPARATOR" ".github/data/test-case-3-pages.txt" | wc -l | grep -q "^2$" || exit 1
head -n 1 ".github/data/test-case-3-pages.txt" | grep -q "^بسم الله الرحمن الرحيم$" || exit 1
tail -n 1 ".github/data/test-case-3-pages.txt" | grep -q "^والصلاة والسلام على أشرف الأنبياء والمرسلين$" || exit 1
rm -f .github/data/*.txt .github/data/*.docx
- name: Test Tahweel on multiple PDF files
run: |
poetry run tahweel ".github/data/test-case-1-page.pdf" ".github/data/test-case-3-pages.pdf" --service-account-credentials service_account_credentials.json
test -f ".github/data/test-case-1-page.txt" || exit 1
test -f ".github/data/test-case-1-page.docx" || exit 1
test -f ".github/data/test-case-3-pages.txt" || exit 1
test -f ".github/data/test-case-3-pages.docx" || exit 1
rm -f .github/data/*.txt .github/data/*.docx
- name: Test Tahweel on a directory with --dir-output-type tree_to_tree
run: |
poetry run tahweel ".github/data/" --service-account-credentials service_account_credentials.json --dir-output-type tree_to_tree
test -f ".github/data - Tahweel TXT/test-case-1-page.txt" || exit 1
test -f ".github/data - Tahweel DOCX/test-case-1-page.docx" || exit 1
test -f ".github/data - Tahweel TXT/test-case-3-pages.txt" || exit 1
test -f ".github/data - Tahweel DOCX/test-case-3-pages.docx" || exit 1
test -f ".github/data - Tahweel TXT/test-case-multilines.txt" || exit 1
test -f ".github/data - Tahweel DOCX/test-case-multilines.docx" || exit 1
test -f ".github/data - Tahweel TXT/test-case.txt" || exit 1
test -f ".github/data - Tahweel DOCX/test-case.docx" || exit 1
rm -rf ".github/data - Tahweel TXT" ".github/data - Tahweel DOCX"
- name: Test Tahweel on a directory with --dir-output-type side_by_side
run: |
poetry run tahweel ".github/data/" --service-account-credentials service_account_credentials.json --dir-output-type side_by_side
test -f ".github/data/test-case-1-page.txt" || exit 1
test -f ".github/data/test-case-1-page.docx" || exit 1
test -f ".github/data/test-case-3-pages.txt" || exit 1
test -f ".github/data/test-case-3-pages.docx" || exit 1
test -f ".github/data/test-case-multilines.txt" || exit 1
test -f ".github/data/test-case-multilines.docx" || exit 1
test -f ".github/data/test-case.txt" || exit 1
test -f ".github/data/test-case.docx" || exit 1
rm -f .github/data/*.txt .github/data/*.docx
- name: Test Tahweel on 3 pages PDF file with --txt-page-separator ANYTHING
run: |
poetry run tahweel ".github/data/test-case-3-pages.pdf" --service-account-credentials service_account_credentials.json --txt-page-separator ANYTHING
test -f ".github/data/test-case-3-pages.txt" || exit 1
test -f ".github/data/test-case-3-pages.docx" || exit 1
grep -o "ANYTHING" ".github/data/test-case-3-pages.txt" | wc -l | grep -q "^2$" || exit 1
rm -f .github/data/*.txt .github/data/*.docx
- name: Test Tahweel on multilines PDF file with --docx-remove-newlines
run: |
poetry run tahweel ".github/data/test-case-multilines.pdf" --service-account-credentials service_account_credentials.json --docx-remove-newlines
test -f ".github/data/test-case-multilines.txt" || exit 1
test -f ".github/data/test-case-multilines.docx" || exit 1
docx2txt ".github/data/test-case-multilines.docx" - | grep -q "^بسم الله الرحمن الرحيم والصلاة والسلام على أشرف الأنبياء والمرسلين$" || exit 1
rm -f .github/data/*.txt .github/data/*.docx
- name: Test Tahweel on 1 page PDF file with --output-formats txt
run: |
poetry run tahweel ".github/data/test-case-1-page.pdf" --service-account-credentials service_account_credentials.json --output-formats txt
test -f ".github/data/test-case-1-page.txt" || exit 1
test ! -f ".github/data/test-case-1-page.docx" || exit 1
rm -f .github/data/*.txt .github/data/*.docx
- name: Test Tahweel on 1 page PDF file with --output-dir ".github/custom-output-dir"
run: |
poetry run tahweel ".github/data/test-case-1-page.pdf" --service-account-credentials service_account_credentials.json --output-dir ".github/custom-output-dir"
test -f ".github/custom-output-dir/test-case-1-page.txt" || exit 1
test -f ".github/custom-output-dir/test-case-1-page.docx" || exit 1
rm -rf .github/custom-output-dir
- name: Test Tahweel on a directory with --dir-output-type tree_to_tree --output-dir ".github/custom-output-dir"
run: |
poetry run tahweel ".github/data/" --service-account-credentials service_account_credentials.json --dir-output-type tree_to_tree --output-dir ".github/custom-output-dir"
test -f ".github/custom-output-dir/Tahweel TXT/test-case-1-page.txt" || exit 1
test -f ".github/custom-output-dir/Tahweel DOCX/test-case-1-page.docx" || exit 1
test -f ".github/custom-output-dir/Tahweel TXT/test-case-3-pages.txt" || exit 1
test -f ".github/custom-output-dir/Tahweel DOCX/test-case-3-pages.docx" || exit 1
test -f ".github/custom-output-dir/Tahweel TXT/test-case-multilines.txt" || exit 1
test -f ".github/custom-output-dir/Tahweel DOCX/test-case-multilines.docx" || exit 1
test -f ".github/custom-output-dir/Tahweel TXT/test-case.txt" || exit 1
test -f ".github/custom-output-dir/Tahweel DOCX/test-case.docx" || exit 1
rm -rf .github/custom-output-dir
- name: Test Tahweel on a directory with --dir-output-type side_by_side --output-dir ".github/custom-output-dir"
run: |
poetry run tahweel ".github/data/" --service-account-credentials service_account_credentials.json --dir-output-type side_by_side --output-dir ".github/custom-output-dir"
test -f ".github/custom-output-dir/test-case-1-page.txt" || exit 1
test -f ".github/custom-output-dir/test-case-1-page.docx" || exit 1
test -f ".github/custom-output-dir/test-case-3-pages.txt" || exit 1
test -f ".github/custom-output-dir/test-case-3-pages.docx" || exit 1
test -f ".github/custom-output-dir/test-case-multilines.txt" || exit 1
test -f ".github/custom-output-dir/test-case-multilines.docx" || exit 1
test -f ".github/custom-output-dir/test-case.txt" || exit 1
test -f ".github/custom-output-dir/test-case.docx" || exit 1
rm -rf .github/custom-output-dir
- name: Test Tahweel on an image file
run: |
poetry run tahweel ".github/data/test-case.jpg" --service-account-credentials service_account_credentials.json
test -f ".github/data/test-case.txt" || exit 1
test -f ".github/data/test-case.docx" || exit 1
rm -f .github/data/*.txt .github/data/*.docx