-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain.py
More file actions
36 lines (23 loc) · 972 Bytes
/
main.py
File metadata and controls
36 lines (23 loc) · 972 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
"""MessyCorp Pandas pipeline — Task 1 through 7 runner."""
import logging
from pathlib import Path
from src.ingest import download_inputs, upload_outputs
from src.clean import load_and_explore, clean_sales
from src.transform import join_customers
from src.report import build_reports, write_outputs
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
DATA_DIR = Path("data")
OUTPUT_DIR = Path("output")
# TODO (Task 7): replace with your GitHub username before running the pipeline.
GITHUB_USERNAME = "<your-github-username>"
def run() -> None:
download_inputs(DATA_DIR)
sales_raw, customers_raw = load_and_explore(DATA_DIR)
sales_clean = clean_sales(sales_raw)
enriched = join_customers(sales_clean, customers_raw)
reports = build_reports(enriched)
write_outputs(reports, OUTPUT_DIR)
upload_outputs(OUTPUT_DIR, GITHUB_USERNAME)
logging.info("Pipeline complete.")
if __name__ == "__main__":
run()