Skip to content

Commit

Permalink
refactor cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
samelamin committed Mar 27, 2017
1 parent 39b0071 commit b17ee16
Show file tree
Hide file tree
Showing 9 changed files with 80 additions and 24 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
./notebooks/
./notebooks
.DS_Store
gcp-secret
google-creds.json
7 changes: 5 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ RUN set -ex \
npm \
' \
&& apt-get update && apt-get install -y --no-install-recommends $buildDeps \
&& apt-get install vim -y \
&& curl -sL http://archive.apache.org/dist/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz \
| gunzip \
| tar x -C /tmp/ \
Expand All @@ -60,14 +61,16 @@ RUN set -ex \
ADD about.json $ZEPPELIN_NOTEBOOK_DIR/2BTRWA9EV/note.json

# Google credentials JSON
COPY gcp-credentials/ ${SPARK_HOME}/gcp-credentials/
COPY gcp-credentials/ /usr/google-credentials/

# Update conf file
COPY conf/core-site.xml ${SPARK_HOME}/conf/core-site.xml


#RUN rm ./spark/target/lib/guava-14.0.1.jar

# Copy BigQuery Connector
COPY lib/spark-bigquery-assembly-0.1.3.jar ${SPARK_HOME}/jars/spark-bigquery-assembly-0.1.3.jar
COPY lib/spark-bigquery-assembly-0.1.4.jar ${SPARK_HOME}/jars/spark-bigquery-assembly-0.1.4.jar

# Copy Jackson related jars
COPY lib/jackson-core-2.8.6.jar ${SPARK_HOME}/jars/jackson-core-2.8.6.jar
Expand Down
2 changes: 1 addition & 1 deletion conf/core-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
<configuration>
<property>
<name>fs.gs.auth.service.account.json.keyfile</name>
<value>/usr/spark-2.1.0/gcp-credentials/google-creds.json</value>
<value>/usr/google-credentials/google-creds.json</value>
<description>GCP JSON key file.</description>
</property>
<property>
Expand Down
22 changes: 3 additions & 19 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,10 @@ zeppelin:
ZEPPELIN_JAVA_OPTS: >-
-Dspark.driver.memory=1g
-Dspark.executor.memory=2g
SPARK_SUBMIT_OPTIONS: >-
--conf spark.driver.host=localhost
--conf spark.driver.port=8081
--conf spark.es.nodes=elasticsearch
--conf spark.es.discovery=false
--conf spark.es.port=9200
--packages org.elasticsearch:elasticsearch-spark-20_2.11:5.0.0-alpha5
MASTER: local[*]
ELASTICSEARCH_HOST: elasticsearch
ELASTICSEARCH_PORT: 9300
links:
- elasticsearch:elasticsearch
ports:
- 8080:8080
- 8081:8081
- 4040:4040
volumes:
- ./notebooks:/usr/zeppelin/notebook
elasticsearch:
image: elasticsearch:2.3
ports:
- 9200:9200
- 9300:9300
- ./data:/usr/zeppelin/data
- ./google-credentials:/usr/google-credentials
- ./notebooks:/usr/zeppelin/notebook
File renamed without changes.
Binary file removed lib/jackson-annotations-2.8.6.jar
Binary file not shown.
Binary file removed lib/jackson-databind-2.8.6.jar
Binary file not shown.
Binary file not shown.
69 changes: 69 additions & 0 deletions notebooks/2CF4GFVZ5/note.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{
"paragraphs": [
{
"text": "import com.samelamin.spark.bigquery._\n// val jsonFile \u003d \"/databricks/Justeat-platform-events-c750aee2059a.json\"\nval BQ_PROJECT_ID \u003d \"justeat-datalake\"\nval GcsBucket \u003d \"je-etl-test\"\n// sqlContext.setGcpJsonKeyFile(jsonFile)\nsqlContext.setGSProjectId(BQ_PROJECT_ID)\nsqlContext.setBigQueryProjectId(BQ_PROJECT_ID)\nsqlContext.setBigQueryGcsBucket(GcsBucket)\nval df \u003d sqlContext.bigQuerySelect(\"SELECT commit, committer.date FROM `bigquery-public-data.github_repos.commits` LIMIT 10\")",
"user": "anonymous",
"dateUpdated": "Mar 27, 2017 8:08:57 PM",
"config": {
"colWidth": 12.0,
"enabled": true,
"results": {},
"editorSetting": {
"language": "scala",
"editOnDblClick": false
},
"editorMode": "ace/mode/scala"
},
"settings": {
"params": {},
"forms": {}
},
"results": {
"code": "ERROR",
"msg": [
{
"type": "TEXT",
"data": "\nimport com.samelamin.spark.bigquery._\n\nBQ_PROJECT_ID: String \u003d justeat-datalake\n\nGcsBucket: String \u003d je-etl-test\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\ncom.google.api.client.googleapis.json.GoogleJsonResponseException: 403 Forbidden\n{\n \"code\" : 403,\n \"errors\" : [ {\n \"domain\" : \"global\",\n \"message\" : \"Access Denied: Dataset justeat-datalake:spark_bigquery_staging_us: The user [email protected] does not have bigquery.datasets.get permission for dataset justeat-datalake:spark_bigquery_staging_us.\",\n \"reason\" : \"accessDenied\"\n } ],\n \"message\" : \"Access Denied: Dataset justeat-datalake:spark_bigquery_staging_us: The user [email protected] does not have bigquery.datasets.get permission for dataset justeat-datalake:spark_bigquery_staging_us.\"\n}\n at com.google.api.client.googleapis.json.GoogleJsonResponseException.from(GoogleJsonResponseException.java:145)\n at com.google.api.client.googleapis.services.json.AbstractGoogleJsonClientRequest.newExceptionOnError(AbstractGoogleJsonClientRequest.java:113)\n at com.google.api.client.googleapis.services.json.AbstractGoogleJsonClientRequest.newExceptionOnError(AbstractGoogleJsonClientRequest.java:40)\n at com.google.api.client.googleapis.services.AbstractGoogleClientRequest$1.interceptResponse(AbstractGoogleClientRequest.java:321)\n at com.google.api.client.http.HttpRequest.execute(HttpRequest.java:1056)\n at com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:419)\n at com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:352)\n at com.google.api.client.googleapis.services.AbstractGoogleClientRequest.execute(AbstractGoogleClientRequest.java:469)\n at com.samelamin.spark.bigquery.BigQueryClient.stagingDataset(BigQueryClient.scala:153)\n at com.samelamin.spark.bigquery.BigQueryClient.com$samelamin$spark$bigquery$BigQueryClient$$temporaryTable(BigQueryClient.scala:179)\n at com.samelamin.spark.bigquery.BigQueryClient$$anon$1.load(BigQueryClient.scala:131)\n at com.samelamin.spark.bigquery.BigQueryClient$$anon$1.load(BigQueryClient.scala:126)\n at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)\n at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)\n at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)\n at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2257)\n at com.google.common.cache.LocalCache.get(LocalCache.java:4000)\n at com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:4004)\n at com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)\n at com.samelamin.spark.bigquery.BigQueryClient.selectQuery(BigQueryClient.scala:118)\n at com.samelamin.spark.bigquery.package$BigQuerySQLContext.bigQuerySelect(package.scala:93)\n ... 48 elided\n"
}
]
},
"apps": [],
"jobName": "paragraph_1490645297785_-321247850",
"id": "20170327-200817_2069028",
"dateCreated": "Mar 27, 2017 8:08:17 PM",
"dateStarted": "Mar 27, 2017 8:08:57 PM",
"dateFinished": "Mar 27, 2017 8:09:12 PM",
"status": "ERROR",
"progressUpdateIntervalMs": 500
},
{
"user": "anonymous",
"config": {},
"settings": {
"params": {},
"forms": {}
},
"apps": [],
"jobName": "paragraph_1490645337568_259399537",
"id": "20170327-200857_735167642",
"dateCreated": "Mar 27, 2017 8:08:57 PM",
"status": "READY",
"progressUpdateIntervalMs": 500
}
],
"name": "bigquery",
"id": "2CF4GFVZ5",
"angularObjects": {
"2CDMMJ82T:shared_process": [],
"2CBBQSHSU:shared_process": [],
"2CE3BGN3J:shared_process": [],
"2CE732PCB:shared_process": [],
"2CETTYMN3:shared_process": [],
"2CCPQPPMS:shared_process": [],
"2CD5YT9JJ:shared_process": [],
"2CE5ZZ76Y:shared_process": [],
"2CF6ATRCN:shared_process": []
},
"config": {},
"info": {}
}

0 comments on commit b17ee16

Please sign in to comment.