diff --git a/marvin_python_toolbox/management/engine.py b/marvin_python_toolbox/management/engine.py index c79a74c..4478815 100644 --- a/marvin_python_toolbox/management/engine.py +++ b/marvin_python_toolbox/management/engine.py @@ -320,6 +320,51 @@ def engine_server(ctx, action, params_file, metadata_file, initial_dataset, data server.stop(0) +@cli.command('engine-dockerbuild', help='Builds a docker image containing the engine. Requires docker service running in the host machine.') +@click.option( + '--type', + '-t', + type=click.Choice(['spark', 'base']), + default='spark', + help='What image type to build. Example: marvin with spark.', +) +@click.option('--tag', '-tg', default='marvinai/marvin', help='Image tag to be used.') +@click.option('--version', '-v', help="Image version to be used.") +def build_docker(type, tag, version): + buildTypes = { + "spark": { + "folder": "marvin-spark-docker" + }, + "base": { + "folder": "marvin-base-docker" + } + } + if version is None: + version = VERSION + print("Will generate a package with the engine in order to build the docker image.") + command_tar = ['tar', '-cf', 'engine.tar', '.'] + run_command(command_tar, "Failed to generate tar file.") + + docker_folder = buildTypes[type]["folder"] + print("Will move the package to the docker folder.") + command_mv = ['mv', 'engine.tar', 'docker/{0}/'.format(docker_folder)] + run_command(command_mv, "Failed to move the package to docker folder.") + + print("Building docker image.") + tag = "{0}-{1}".format(tag, type) + command = ['docker', 'build', '-t', '{0}:{1}'.format(tag, version), 'docker/{0}/'.format(docker_folder)] + run_command(command, "Failed to build docker image.") + + print("Successfully built docker image with tag {0}. To start the engine-httpserver with docker run .".format(tag)) + + +def run_command(command, error_message="A failure occurred."): + try: + subprocess.Popen(command, env=os.environ).wait() + except: + logger.exception(error_message) + sys.exit(1) + TEMPLATE_BASES = { 'python-engine': os.path.join(os.path.dirname(__file__), 'templates', 'python-engine') } @@ -336,7 +381,6 @@ def engine_server(ctx, action, params_file, metadata_file, initial_dataset, data _orig_type = type - @cli.command('engine-generateenv', help='Generate a new marvin engine environment and install default requirements.') @click.argument('engine-path', type=click.Path(exists=True)) def generate_env(engine_path): @@ -544,6 +588,7 @@ def _call_git_init(dest): default='all', type=click.Choice(['all', 'acquisitor', 'tpreparator', 'trainer', 'evaluator', 'ppreparator', 'predictor']), help='Marvin engine action name') +@click.option('--model-protocol', '-mp', help='Model protocol to be loaded. Useful for loading a previous trained model.', type=click.Path(exists=True)) @click.option('--initial-dataset', '-id', help='Initial dataset file path', type=click.Path(exists=True)) @click.option('--dataset', '-d', help='Dataset file path', type=click.Path(exists=True)) @click.option('--model', '-m', help='Engine model file path', type=click.Path(exists=True)) @@ -584,6 +629,7 @@ def engine_httpserver(ctx, action, params_file, initial_dataset, dataset, '-DmarvinConfig.engineHome={}'.format(ctx.obj['config']['inidir']), '-DmarvinConfig.ipAddress={}'.format(http_host), '-DmarvinConfig.port={}'.format(http_port), + '-DmarvinConfig.modelProtocol={}'.format(model-protocol), '-jar', executor_path]) diff --git a/marvin_python_toolbox/management/templates/python-engine/docker/marvin-base-docker/Dockerfile b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-base-docker/Dockerfile new file mode 100644 index 0000000..a2a14fa --- /dev/null +++ b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-base-docker/Dockerfile @@ -0,0 +1,31 @@ +FROM debian:jessie + +RUN echo "deb http://http.debian.net/debian jessie-backports main" >> /etc/apt/sources.list \ + && apt-get update \ + && apt-get install -y software-properties-common curl wget \ + && apt-get install -yt jessie-backports openjdk-8-jdk \ + && apt-get install -y git \ + && apt-get -qy install python2.7-dev python-pip ipython libsasl2-dev gcc \ + && apt-get -qy install libffi-dev \ + && apt-get -qy install libssl-dev \ + && apt-get -qy install libxml2-dev libxslt1-dev \ + && apt-get -qy install libpng12-dev libfreetype6-dev \ + && apt-get -qy install python-tk + +#Engines will run using the user marvin +RUN useradd --create-home -s /bin/bash -G sudo marvin + +##Install virtualenv & update pip +ENV WORKON_HOME /home/marvin/.virtualenvs +RUN pip install -q virtualenvwrapper \ + && echo 'source /usr/local/bin/virtualenvwrapper.sh' >> /home/marvin/.profile \ + && mkdir -p /opt/marvin/data \ + && pip install --upgrade pip + +#Take ownership of needed folders +RUN chown -R marvin:marvin /opt + +ENV MARVIN_DATA_PATH /opt/marvin/data + +USER marvin +WORKDIR /home/marvin diff --git a/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/Dockerfile b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/Dockerfile new file mode 100644 index 0000000..ea0934f --- /dev/null +++ b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/Dockerfile @@ -0,0 +1,29 @@ +FROM marvinai/marvin-base + +USER root + +ADD http://archive.apache.org/dist/spark/spark-2.1.1/spark-2.1.1-bin-hadoop2.6.tgz /opt/ + +#Unpack tgzs +RUN ls -l /opt \ + && mv /opt/spark-2.1.1-bin-hadoop2.6 /opt/spark + +#Add configuration files +ADD spark-conf/* /opt/spark/conf/ + +ADD engine.tar /opt/engine + +COPY virtualenv_entrypoint.sh /opt/engine + +RUN chown marvin:marvin -R /opt/engine + +USER marvin + +ENV SPARK_HOME /opt/spark +ENV HADOOP_CONF_DIR /opt/spark/conf +ENV MARVIN_HOME /opt/engine + +RUN cd /opt/engine \ + && bash -c 'source /usr/local/bin/virtualenvwrapper.sh && mkvirtualenv engine-env && setvirtualenvproject && make marvin' + +ENTRYPOINT "/opt/engine/virtualenv_entrypoint.sh" diff --git a/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/build.sh b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/build.sh new file mode 100755 index 0000000..d98887f --- /dev/null +++ b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/build.sh @@ -0,0 +1,9 @@ +#!/bin/bash +if [ -z "$1" ] + then + echo "You must specify the version of the image being built" + exit 1 +fi +docker build -t registry.b2w.io/b2wdigital/predictionio-b2w:"$1" . + + diff --git a/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/core-site.xml b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/core-site.xml new file mode 100644 index 0000000..e69de29 diff --git a/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/fairscheduler.xml b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/fairscheduler.xml new file mode 100644 index 0000000..385b2e7 --- /dev/null +++ b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/fairscheduler.xml @@ -0,0 +1,31 @@ + + + + + + + FAIR + 1 + 2 + + + FIFO + 2 + 3 + + diff --git a/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/hdfs-site.xml b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/hdfs-site.xml new file mode 100644 index 0000000..e69de29 diff --git a/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/hive-site.xml b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/hive-site.xml new file mode 100644 index 0000000..e69de29 diff --git a/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/log4j.properties b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/log4j.properties new file mode 100644 index 0000000..ec1aa18 --- /dev/null +++ b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/log4j.properties @@ -0,0 +1,40 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the console +log4j.rootCategory=INFO, console +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + +# Set the default spark-shell log level to WARN. When running the spark-shell, the +# log level for this class is used to overwrite the root logger's log level, so that +# the user can have different defaults for the shell and regular Spark apps. +log4j.logger.org.apache.spark.repl.Main=WARN + +# Settings to quiet third party logs that are too verbose +log4j.logger.org.spark_project.jetty=WARN +log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR +log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO +log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO +log4j.logger.org.apache.parquet=ERROR +log4j.logger.parquet=ERROR + +# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support +log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL +log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR diff --git a/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/metrics.properties b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/metrics.properties new file mode 100644 index 0000000..aeb76c9 --- /dev/null +++ b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/metrics.properties @@ -0,0 +1,170 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# syntax: [instance].sink|source.[name].[options]=[value] + +# This file configures Spark's internal metrics system. The metrics system is +# divided into instances which correspond to internal components. +# Each instance can be configured to report its metrics to one or more sinks. +# Accepted values for [instance] are "master", "worker", "executor", "driver", +# and "applications". A wildcard "*" can be used as an instance name, in +# which case all instances will inherit the supplied property. +# +# Within an instance, a "source" specifies a particular set of grouped metrics. +# there are two kinds of sources: +# 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will +# collect a Spark component's internal state. Each instance is paired with a +# Spark source that is added automatically. +# 2. Common sources, like JvmSource, which will collect low level state. +# These can be added through configuration options and are then loaded +# using reflection. +# +# A "sink" specifies where metrics are delivered to. Each instance can be +# assigned one or more sinks. +# +# The sink|source field specifies whether the property relates to a sink or +# source. +# +# The [name] field specifies the name of source or sink. +# +# The [options] field is the specific property of this source or sink. The +# source or sink is responsible for parsing this property. +# +# Notes: +# 1. To add a new sink, set the "class" option to a fully qualified class +# name (see examples below). +# 2. Some sinks involve a polling period. The minimum allowed polling period +# is 1 second. +# 3. Wildcard properties can be overridden by more specific properties. +# For example, master.sink.console.period takes precedence over +# *.sink.console.period. +# 4. A metrics specific configuration +# "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be +# added to Java properties using -Dspark.metrics.conf=xxx if you want to +# customize metrics system. You can also put the file in ${SPARK_HOME}/conf +# and it will be loaded automatically. +# 5. The MetricsServlet sink is added by default as a sink in the master, +# worker and driver, and you can send HTTP requests to the "/metrics/json" +# endpoint to get a snapshot of all the registered metrics in JSON format. +# For master, requests to the "/metrics/master/json" and +# "/metrics/applications/json" endpoints can be sent separately to get +# metrics snapshots of the master instance and applications. This +# MetricsServlet does not have to be configured. + +## List of available common sources and their properties. + +# org.apache.spark.metrics.source.JvmSource +# Note: Currently, JvmSource is the only available common source. +# It can be added to an instance by setting the "class" option to its +# fully qualified class name (see examples below). + +## List of available sinks and their properties. + +# org.apache.spark.metrics.sink.ConsoleSink +# Name: Default: Description: +# period 10 Poll period +# unit seconds Unit of the poll period + +# org.apache.spark.metrics.sink.CSVSink +# Name: Default: Description: +# period 10 Poll period +# unit seconds Unit of the poll period +# directory /tmp Where to store CSV files + +# org.apache.spark.metrics.sink.GangliaSink +# Name: Default: Description: +# host NONE Hostname or multicast group of the Ganglia server, +# must be set +# port NONE Port of the Ganglia server(s), must be set +# period 10 Poll period +# unit seconds Unit of the poll period +# ttl 1 TTL of messages sent by Ganglia +# dmax 0 Lifetime in seconds of metrics (0 never expired) +# mode multicast Ganglia network mode ('unicast' or 'multicast') + +# org.apache.spark.metrics.sink.JmxSink + +# org.apache.spark.metrics.sink.MetricsServlet +# Name: Default: Description: +# path VARIES* Path prefix from the web server root +# sample false Whether to show entire set of samples for histograms +# ('false' or 'true') +# +# * Default path is /metrics/json for all instances except the master. The +# master has two paths: +# /metrics/applications/json # App information +# /metrics/master/json # Master information + +# org.apache.spark.metrics.sink.GraphiteSink +# Name: Default: Description: +# host NONE Hostname of the Graphite server, must be set +# port NONE Port of the Graphite server, must be set +# period 10 Poll period +# unit seconds Unit of the poll period +# prefix EMPTY STRING Prefix to prepend to every metric's name +# protocol tcp Protocol ("tcp" or "udp") to use + +## Examples +# Enable JmxSink for all instances by class name +#*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink + +# Enable ConsoleSink for all instances by class name +#*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink + +# Polling period for the ConsoleSink +#*.sink.console.period=10 +# Unit of the polling period for the ConsoleSink +#*.sink.console.unit=seconds + +# Polling period for the ConsoleSink specific for the master instance +#master.sink.console.period=15 +# Unit of the polling period for the ConsoleSink specific for the master +# instance +#master.sink.console.unit=seconds + +# Enable CsvSink for all instances by class name +#*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink + +# Polling period for the CsvSink +#*.sink.csv.period=1 +# Unit of the polling period for the CsvSink +#*.sink.csv.unit=minutes + +# Polling directory for CsvSink +#*.sink.csv.directory=/tmp/ + +# Polling period for the CsvSink specific for the worker instance +#worker.sink.csv.period=10 +# Unit of the polling period for the CsvSink specific for the worker instance +#worker.sink.csv.unit=minutes + +# Enable Slf4jSink for all instances by class name +#*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink + +# Polling period for the Slf4JSink +#*.sink.slf4j.period=1 +# Unit of the polling period for the Slf4jSink +#*.sink.slf4j.unit=minutes + +# Enable JvmSource for instance master, worker, driver and executor +#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource + +#worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource + +#driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource + +#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource diff --git a/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/slaves b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/slaves new file mode 100644 index 0000000..be42a63 --- /dev/null +++ b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/slaves @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# A Spark Worker will be started on each of the machines listed below. +localhost \ No newline at end of file diff --git a/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/spark-defaults.conf b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/spark-defaults.conf new file mode 100644 index 0000000..19cba6e --- /dev/null +++ b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/spark-defaults.conf @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Default system properties included when running spark-submit. +# This is useful for setting default environmental settings. + +# Example: +# spark.master spark://master:7077 +# spark.eventLog.enabled true +# spark.eventLog.dir hdfs://namenode:8021/directory +# spark.serializer org.apache.spark.serializer.KryoSerializer +# spark.driver.memory 5g +# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" diff --git a/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/spark-env.sh b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/spark-env.sh new file mode 100755 index 0000000..5c1e876 --- /dev/null +++ b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/spark-env.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This file is sourced when running various Spark programs. +# Copy it as spark-env.sh and edit that to configure Spark for your site. + +# Options read when launching programs locally with +# ./bin/run-example or ./bin/spark-submit +# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files +# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node +# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program +# - SPARK_CLASSPATH, default classpath entries to append + +# Options read by executors and drivers running inside the cluster +# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node +# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program +# - SPARK_CLASSPATH, default classpath entries to append +# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data +# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos + +# Options read in YARN client mode +# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files +# - SPARK_EXECUTOR_INSTANCES, Number of executors to start (Default: 2) +# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1). +# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G) +# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G) + +# Options for the daemons used in the standalone deploy mode +# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname +# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master +# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y") +# - SPARK_WORKER_CORES, to set the number of cores to use on this machine +# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g) +# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker +# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node +# - SPARK_WORKER_DIR, to set the working directory of worker processes +# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y") +# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g). +# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y") +# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y") +# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y") +# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers + +# Generic options for the daemons used in the standalone deploy mode +# - SPARK_CONF_DIR Alternate conf dir. (Default: ${SPARK_HOME}/conf) +# - SPARK_LOG_DIR Where log files are stored. (Default: ${SPARK_HOME}/logs) +# - SPARK_PID_DIR Where the pid file is stored. (Default: /tmp) +# - SPARK_IDENT_STRING A string representing this instance of spark. (Default: $USER) +# - SPARK_NICENESS The scheduling priority for daemons. (Default: 0) +# - SPARK_NO_DAEMONIZE Run the proposed command in the foreground. It will not output a PID file. diff --git a/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/yarn-site.xml b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/spark-conf/yarn-site.xml new file mode 100644 index 0000000..e69de29 diff --git a/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/virtualenv_entrypoint.sh b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/virtualenv_entrypoint.sh new file mode 100644 index 0000000..ddf0c4e --- /dev/null +++ b/marvin_python_toolbox/management/templates/python-engine/docker/marvin-spark-docker/virtualenv_entrypoint.sh @@ -0,0 +1,2 @@ +#!/bin/bash +/bin/bash -c "source /usr/local/bin/virtualenvwrapper.sh && workon engine-env && marvin engine-httpserver" \ No newline at end of file