Skip to content

Commit 1c9740b

Browse files
committed
Remove spark.stop() and sc.stop() from notebook examples
1 parent 3c68b19 commit 1c9740b

15 files changed

+784
-1181
lines changed

examples/notebooks/delta_dataframe.ipynb

Lines changed: 34 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -29,29 +29,11 @@
2929
"bucket_name = \"<Your S3 bucket name>\"\n",
3030
"bucket_prefix = \"<Your S3 bucket prefix>\"\n",
3131
"database_name = \"delta_dataframe\"\n",
32+
"database_prefix = f\"{bucket_prefix}/{database_name}\"\n",
33+
"database_location = f\"s3://{bucket_name}/{database_prefix}/\"\n",
3234
"table_name = \"products\"\n",
33-
"table_prefix = f\"{bucket_prefix}/{database_name}/{table_name}\"\n",
34-
"table_location = f\"s3://{bucket_name}/{table_prefix}\""
35-
]
36-
},
37-
{
38-
"cell_type": "markdown",
39-
"id": "5db09157",
40-
"metadata": {},
41-
"source": [
42-
"## Initialize SparkSession"
43-
]
44-
},
45-
{
46-
"cell_type": "code",
47-
"execution_count": null,
48-
"id": "9e319735",
49-
"metadata": {},
50-
"outputs": [],
51-
"source": [
52-
"from pyspark.sql import SparkSession\n",
53-
"spark.stop()\n",
54-
"spark = SparkSession.builder.getOrCreate()"
35+
"table_prefix = f\"{database_prefix}/{table_name}\"\n",
36+
"table_location = f\"s3://{bucket_name}/{table_prefix}/\""
5537
]
5638
},
5739
{
@@ -77,16 +59,15 @@
7759
"bucket.objects.filter(Prefix=f\"{table_prefix}/\").delete()\n",
7860
"\n",
7961
"## Drop tables in Glue Data Catalog\n",
62+
"glue = boto3.client('glue')\n",
8063
"try:\n",
81-
" glue = boto3.client('glue')\n",
8264
" glue.delete_table(DatabaseName=database_name, Name=table_name)\n",
8365
"except glue.exceptions.EntityNotFoundException:\n",
84-
" print(f\"{database_name}.{table_name} does not exist\")\n",
66+
" print(f\"Table {database_name}.{table_name} does not exist\")\n",
8567
"try:\n",
86-
" glue = boto3.client('glue')\n",
8768
" glue.delete_table(DatabaseName=database_name, Name='testTable')\n",
8869
"except glue.exceptions.EntityNotFoundException:\n",
89-
" print(f\"{database_name}.testTable does not exist\")\n"
70+
" print(f\"Table {database_name}.testTable does not exist\")\n"
9071
]
9172
},
9273
{
@@ -97,6 +78,30 @@
9778
"## Create Delta table with sample data"
9879
]
9980
},
81+
{
82+
"cell_type": "code",
83+
"execution_count": null,
84+
"id": "a2d32110",
85+
"metadata": {},
86+
"outputs": [],
87+
"source": [
88+
"try:\n",
89+
" glue = boto3.client('glue')\n",
90+
" res = glue.get_database(Name=database_name)\n",
91+
" print(f\"Database {database_name} exists.\")\n",
92+
" if 'LocationUri' not in res['Database']:\n",
93+
" print(f\"Warning: Database {database_name} does not have Location. You need to configure location in the database.\")\n",
94+
"except glue.exceptions.EntityNotFoundException:\n",
95+
" print(f\"Database {database_name} does not exist.\")\n",
96+
" glue = glue.create_database(\n",
97+
" DatabaseInput={\n",
98+
" 'Name': database_name,\n",
99+
" 'LocationUri': database_location\n",
100+
" }\n",
101+
" )\n",
102+
" print(f\"Created a new database {database_name}.\")"
103+
]
104+
},
100105
{
101106
"cell_type": "code",
102107
"execution_count": null,
@@ -466,19 +471,9 @@
466471
],
467472
"metadata": {
468473
"kernelspec": {
469-
"display_name": "Glue Spark",
470-
"language": "scala",
471-
"name": "glue_scala_kernel"
472-
},
473-
"language_info": {
474-
"codemirror_mode": {
475-
"name": "python",
476-
"version": 3
477-
},
478-
"file_extension": ".py",
479-
"mimetype": "text/x-python",
480-
"name": "scala",
481-
"pygments_lexer": "python3"
474+
"display_name": "Glue PySpark",
475+
"language": "python",
476+
"name": "glue_pyspark"
482477
},
483478
"toc-autonumbering": true,
484479
"toc-showcode": true,

examples/notebooks/delta_sql.ipynb

Lines changed: 7 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -29,29 +29,11 @@
2929
"bucket_name = \"<Your S3 bucket name>\"\n",
3030
"bucket_prefix = \"<Your S3 bucket prefix>\"\n",
3131
"database_name = \"delta_sql\"\n",
32+
"database_prefix = f\"{bucket_prefix}/{database_name}\"\n",
33+
"database_location = f\"s3://{bucket_name}/{database_prefix}/\"\n",
3234
"table_name = \"products\"\n",
33-
"table_prefix = f\"{bucket_prefix}/{database_name}/{table_name}\"\n",
34-
"table_location = f\"s3://{bucket_name}/{table_prefix}\""
35-
]
36-
},
37-
{
38-
"cell_type": "markdown",
39-
"id": "52f5b844",
40-
"metadata": {},
41-
"source": [
42-
"## Initialize SparkSession"
43-
]
44-
},
45-
{
46-
"cell_type": "code",
47-
"execution_count": null,
48-
"id": "42f78b7b",
49-
"metadata": {},
50-
"outputs": [],
51-
"source": [
52-
"from pyspark.sql import SparkSession\n",
53-
"spark.stop()\n",
54-
"spark = SparkSession.builder.getOrCreate()"
35+
"table_prefix = f\"{database_prefix}/{table_name}\"\n",
36+
"table_location = f\"s3://{bucket_name}/{table_prefix}/\""
5537
]
5638
},
5739
{
@@ -444,19 +426,9 @@
444426
],
445427
"metadata": {
446428
"kernelspec": {
447-
"display_name": "Glue Spark",
448-
"language": "scala",
449-
"name": "glue_scala_kernel"
450-
},
451-
"language_info": {
452-
"codemirror_mode": {
453-
"name": "python",
454-
"version": 3
455-
},
456-
"file_extension": ".py",
457-
"mimetype": "text/x-python",
458-
"name": "scala",
459-
"pygments_lexer": "python3"
429+
"display_name": "Glue PySpark",
430+
"language": "python",
431+
"name": "glue_pyspark"
460432
}
461433
},
462434
"nbformat": 4,

0 commit comments

Comments
 (0)