-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpipedag.yaml
241 lines (208 loc) · 6.42 KB
/
pipedag.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
name: pipedag_tests
table_store_connections:
postgres:
args:
url: "postgresql://sa:[email protected]:6543/{instance_id}"
create_database_if_not_exists: true
mssql:
args:
url: "mssql+pyodbc://sa:[email protected]:1433/{instance_id}?driver=ODBC+Driver+18+for+SQL+Server&encrypt=no"
create_database_if_not_exists: true
strict_materialization_details: false
ibm_db2:
args:
url: "db2+ibm_db://db2inst1:password@localhost:50000/testdb"
schema_prefix: "{instance_id}_"
create_database_if_not_exists: false
duckdb:
args:
url: "duckdb:////tmp/pipedag/{instance_id}/db.duckdb"
create_database_if_not_exists: true
snowflake:
args:
url: "snowflake://{$SNOWFLAKE_USER}:{$SNOWFLAKE_PASSWORD}@{$SNOWFLAKE_ACCOUNT}/pipedag/DBO?warehouse=pipedag&role=accountadmin"
schema_prefix: "{instance_id}_"
create_database_if_not_exists: true
blob_store_connections:
file:
class: "pydiverse.pipedag.backend.blob.FileBlobStore"
args:
base_path: "/tmp/pipedag/blobs"
no_blob:
class: "pydiverse.pipedag.backend.blob.NoBlobStore"
args:
instances:
__any__:
network_interface: "127.0.0.1"
auto_table:
- "pandas.DataFrame"
- "sqlalchemy.sql.expression.TextClause"
- "sqlalchemy.sql.expression.Selectable"
# Attention: For disable_kroki: false, stage and task names might be sent to the kroki_url.
# You can self-host kroki if you like:
# https://docs.kroki.io/kroki/setup/install/
disable_kroki: true
kroki_url: "https://kroki.io"
fail_fast: true
instance_id: pipedag_default
table_store:
table_store_connection: postgres
class: "pydiverse.pipedag.backend.table.SQLTableStore"
args:
print_materialize: true
print_sql: true
hook_args:
pandas:
dtype_backend: "arrow"
blob_store:
blob_store_connection: file
lock_manager:
class: "pydiverse.pipedag.backend.lock.DatabaseLockManager"
orchestration:
class: "pydiverse.pipedag.engine.SequentialEngine"
# Instances used by pytest
## Database Instances
postgres:
instance_id: pd_postgres
table_store:
table_store_connection: postgres
postgres_unlogged:
instance_id: pd_postgres_unlogged
table_store:
table_store_connection: postgres
args:
materialization_details:
__any__:
unlogged: true
hook_args:
pandas:
dtype_backend: "numpy" # also test numpy backed pandas
blob_store:
blob_store_connection: no_blob
mssql:
instance_id: pd_mssql
table_store:
table_store_connection: mssql
args:
disable_pytsql: true
blob_store:
blob_store_connection: no_blob
mssql_pytsql:
instance_id: pd_mssql_pytsql
table_store:
table_store_connection: mssql
args:
disable_pytsql: false
pytsql_isolate_top_level_statements: true
blob_store:
blob_store_connection: no_blob
ibm_db2:
instance_id: pd_ibm_db2
stage_commit_technique: READ_VIEWS
table_store:
table_store_connection: ibm_db2
blob_store:
blob_store_connection: no_blob
ibm_db2_avoid_schema:
instance_id: pd_ibm_db2_avoid_schema
stage_commit_technique: READ_VIEWS
table_store:
table_store_connection: ibm_db2
args:
avoid_drop_create_schema: true
blob_store:
blob_store_connection: no_blob
ibm_db2_materialization_details:
instance_id: pd_ibm_db2_materialization_details
stage_commit_technique: READ_VIEWS
table_store:
table_store_connection: ibm_db2
args:
strict_materialization_details: false
default_materialization_details: "no_compression"
materialization_details:
__any__:
compression: [ "COMPRESS YES ADAPTIVE" ]
no_compression:
compression: ""
value_compression:
compression: "VALUE COMPRESSION"
static_compression:
compression: "COMPRESS YES STATIC"
adaptive_value_compression:
compression: [ "COMPRESS YES ADAPTIVE", "VALUE COMPRESSION" ]
table_space:
table_space_data: "S1"
table_space_index: "S2"
table_space_long: "S3"
blob_store:
blob_store_connection: no_blob
duckdb:
instance_id: pd_duckdb
stage_commit_technique: READ_VIEWS
table_store:
table_store_connection: duckdb
lock_manager:
class: "pydiverse.pipedag.backend.lock.ZooKeeperLockManager"
args:
hosts: "localhost:2181"
snowflake:
instance_id: pd_snowflake
table_store:
table_store_connection: snowflake
lock_manager:
class: "pydiverse.pipedag.backend.lock.ZooKeeperLockManager"
args:
hosts: "localhost:2181"
## Table Cache Instances
local_table_cache:
instance_id: local_table_cache
table_store:
local_table_cache:
class: "pydiverse.pipedag.backend.table.cache.ParquetTableCache"
args:
base_path: "/tmp/pipedag/table_cache"
local_table_cache_inout:
instance_id: local_table_cache_inout
table_store:
local_table_cache:
store_input: true
store_output: true
use_stored_input_as_cache: true
class: "pydiverse.pipedag.backend.table.cache.ParquetTableCache"
args:
base_path: "/tmp/pipedag/table_cache"
local_table_cache_inout_numpy:
instance_id: local_table_cache_inout_numpy
table_store:
hook_args:
pandas:
dtype_backend: "numpy"
local_table_cache:
store_input: true
store_output: true
use_stored_input_as_cache: true
class: "pydiverse.pipedag.backend.table.cache.ParquetTableCache"
args:
base_path: "/tmp/pipedag/table_cache"
local_table_store:
instance_id: local_table_store
table_store:
local_table_cache:
store_input: true
store_output: true
use_stored_input_as_cache: false
class: "pydiverse.pipedag.backend.table.cache.ParquetTableCache"
args:
base_path: "/tmp/pipedag/table_cache"
## Orchestration Engine Instances
dask_engine:
instance_id: dask_engine
orchestration:
class: "pydiverse.pipedag.engine.DaskEngine"
args:
num_workers: 8
prefect_engine:
instance_id: prefect_engine
orchestration:
class: "pydiverse.pipedag.engine.prefect.PrefectEngine"