From 292cda1e2a44c7035816ad958382f3c6b73dc98a Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Tue, 5 Dec 2023 16:58:55 -0800 Subject: [PATCH] Update the release workflow and clean up the REEADME for a 0.8.1 release --- .github/workflows/release.yml | 14 ++++++++++++-- README.adoc | 20 ++++++++++++++++---- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index add9e0c..9e14312 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -77,12 +77,22 @@ jobs: env: GITHUB_TOKEN: ${{ github.token }} - - name: Upload Lambda zip + - name: Upload oxbow lambda uses: actions/upload-release-asset@v1 env: GITHUB_TOKEN: ${{ github.token }} with: upload_url: ${{ steps.create_release.outputs.upload_url }} asset_path: ./target/lambda/oxbow-lambda/bootstrap.zip - asset_name: oxbow-lambda-bootstrap.zip + asset_name: oxbow-lambda-bootstrap-${{ github.ref }}.zip + asset_content_type: application/zip + + - name: Upload group-events lambda + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ github.token }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ./target/lambda/group-events/bootstrap.zip + asset_name: group-events-lambda-bootstrap-${{ github.ref }}.zip asset_content_type: application/zip diff --git a/README.adoc b/README.adoc index 1c6dac0..6136a08 100644 --- a/README.adoc +++ b/README.adoc @@ -9,7 +9,7 @@ endif::[] = Oxbow -Oxbow is a simple project to take an existing storage location which contains +Oxbow is a project to take an existing storage location which contains link:https://parquet.apache.org[Apache Parquet] files into a link:https://delta.io[Delta Lake table]. It is intended to run both as an AWS Lambda or as a command line application. @@ -47,7 +47,7 @@ into a Delta table! === Lambda -The `deployment.tf` file contains the necessary Terraform to provision the +The `deployment/` directory contains the necessary Terraform to provision the function, a DynamoDB table for locking, S3 bucket, and IAM permissions. After configuring the necessary authentication for Terraform, the following @@ -68,6 +68,18 @@ allowed. For bucket locations with massive `.parquet` files, this may need to be tuned. ==== +==== Advanced + +To help ameliorate +link:https://www.buoyantdata.com/blog/2023-11-27-concurrency-limitations-with-deltalake-on-aws.html[concurrency +challenges for Delta Lake on AWS] with the DynamoDb lock, the `deployment/` +directory also contains an "advanced" pattern which uses the `group-events` +Lambda to help serialize S3 Bucket Notifications into an AWS SQS FIFO with +Message Group IDs. + +To build all the necessary code locally for the Advanced pattern, please run +`make build-release` + == Development @@ -78,7 +90,7 @@ lambda` command line tool, e.g.: [source,bash] ---- -cargo lambda build --features lambda --release --output-format zip +cargo lambda build --release --output-format zip ---- This will produce the file: `target/lambda/oxbow-lambda/bootstrap.zip` which can be @@ -102,4 +114,4 @@ a `.parquet` file is added to the bucket/prefix. == Licensing -This repository is intentionally licensed under the link:https://www.gnu.org/licenses/agpl-3.0.en.html[AGPL 3.0]. If your organization is interested in re-licensing this function for re-use, contact me via email for commercial licensing terms: `rtyler@brokenco.de` +This repository is licensed under the link:https://www.gnu.org/licenses/agpl-3.0.en.html[AGPL 3.0]. If your organization is interested in re-licensing this function for re-use, contact me via email for commercial licensing terms: `rtyler@brokenco.de`