|
29 | 29 | "bucket_name = \"<Your S3 bucket name>\"\n",
|
30 | 30 | "bucket_prefix = \"<Your S3 bucket prefix>\"\n",
|
31 | 31 | "database_name = \"delta_dataframe\"\n",
|
| 32 | + "database_prefix = f\"{bucket_prefix}/{database_name}\"\n", |
| 33 | + "database_location = f\"s3://{bucket_name}/{database_prefix}/\"\n", |
32 | 34 | "table_name = \"products\"\n",
|
33 |
| - "table_prefix = f\"{bucket_prefix}/{database_name}/{table_name}\"\n", |
34 |
| - "table_location = f\"s3://{bucket_name}/{table_prefix}\"" |
35 |
| - ] |
36 |
| - }, |
37 |
| - { |
38 |
| - "cell_type": "markdown", |
39 |
| - "id": "5db09157", |
40 |
| - "metadata": {}, |
41 |
| - "source": [ |
42 |
| - "## Initialize SparkSession" |
43 |
| - ] |
44 |
| - }, |
45 |
| - { |
46 |
| - "cell_type": "code", |
47 |
| - "execution_count": null, |
48 |
| - "id": "9e319735", |
49 |
| - "metadata": {}, |
50 |
| - "outputs": [], |
51 |
| - "source": [ |
52 |
| - "from pyspark.sql import SparkSession\n", |
53 |
| - "spark.stop()\n", |
54 |
| - "spark = SparkSession.builder.getOrCreate()" |
| 35 | + "table_prefix = f\"{database_prefix}/{table_name}\"\n", |
| 36 | + "table_location = f\"s3://{bucket_name}/{table_prefix}/\"" |
55 | 37 | ]
|
56 | 38 | },
|
57 | 39 | {
|
|
77 | 59 | "bucket.objects.filter(Prefix=f\"{table_prefix}/\").delete()\n",
|
78 | 60 | "\n",
|
79 | 61 | "## Drop tables in Glue Data Catalog\n",
|
| 62 | + "glue = boto3.client('glue')\n", |
80 | 63 | "try:\n",
|
81 |
| - " glue = boto3.client('glue')\n", |
82 | 64 | " glue.delete_table(DatabaseName=database_name, Name=table_name)\n",
|
83 | 65 | "except glue.exceptions.EntityNotFoundException:\n",
|
84 |
| - " print(f\"{database_name}.{table_name} does not exist\")\n", |
| 66 | + " print(f\"Table {database_name}.{table_name} does not exist\")\n", |
85 | 67 | "try:\n",
|
86 |
| - " glue = boto3.client('glue')\n", |
87 | 68 | " glue.delete_table(DatabaseName=database_name, Name='testTable')\n",
|
88 | 69 | "except glue.exceptions.EntityNotFoundException:\n",
|
89 |
| - " print(f\"{database_name}.testTable does not exist\")\n" |
| 70 | + " print(f\"Table {database_name}.testTable does not exist\")\n" |
90 | 71 | ]
|
91 | 72 | },
|
92 | 73 | {
|
|
97 | 78 | "## Create Delta table with sample data"
|
98 | 79 | ]
|
99 | 80 | },
|
| 81 | + { |
| 82 | + "cell_type": "code", |
| 83 | + "execution_count": null, |
| 84 | + "id": "a2d32110", |
| 85 | + "metadata": {}, |
| 86 | + "outputs": [], |
| 87 | + "source": [ |
| 88 | + "try:\n", |
| 89 | + " glue = boto3.client('glue')\n", |
| 90 | + " res = glue.get_database(Name=database_name)\n", |
| 91 | + " print(f\"Database {database_name} exists.\")\n", |
| 92 | + " if 'LocationUri' not in res['Database']:\n", |
| 93 | + " print(f\"Warning: Database {database_name} does not have Location. You need to configure location in the database.\")\n", |
| 94 | + "except glue.exceptions.EntityNotFoundException:\n", |
| 95 | + " print(f\"Database {database_name} does not exist.\")\n", |
| 96 | + " glue = glue.create_database(\n", |
| 97 | + " DatabaseInput={\n", |
| 98 | + " 'Name': database_name,\n", |
| 99 | + " 'LocationUri': database_location\n", |
| 100 | + " }\n", |
| 101 | + " )\n", |
| 102 | + " print(f\"Created a new database {database_name}.\")" |
| 103 | + ] |
| 104 | + }, |
100 | 105 | {
|
101 | 106 | "cell_type": "code",
|
102 | 107 | "execution_count": null,
|
|
466 | 471 | ],
|
467 | 472 | "metadata": {
|
468 | 473 | "kernelspec": {
|
469 |
| - "display_name": "Glue Spark", |
470 |
| - "language": "scala", |
471 |
| - "name": "glue_scala_kernel" |
472 |
| - }, |
473 |
| - "language_info": { |
474 |
| - "codemirror_mode": { |
475 |
| - "name": "python", |
476 |
| - "version": 3 |
477 |
| - }, |
478 |
| - "file_extension": ".py", |
479 |
| - "mimetype": "text/x-python", |
480 |
| - "name": "scala", |
481 |
| - "pygments_lexer": "python3" |
| 474 | + "display_name": "Glue PySpark", |
| 475 | + "language": "python", |
| 476 | + "name": "glue_pyspark" |
482 | 477 | },
|
483 | 478 | "toc-autonumbering": true,
|
484 | 479 | "toc-showcode": true,
|
|
0 commit comments