diff --git a/.travis.yml b/.travis.yml index e501d61..902f0c8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,8 +3,6 @@ dist: trusty env: matrix: - - IMAGES='teradatalabs/centos6-java8-oracle.dependants' - - IMAGES='teradatalabs/centos6-ssh-oj8.dependants' - IMAGES=$( echo teradatalabs/{dns,ubuntu-trusty-python2.6,presto-admin-devenv} ) # Prevent duplicate builds on tag pushes. @@ -22,7 +20,7 @@ before_install: - sudo pip install docker-release - docker-release --version -install: make ${IMAGES[@]} +install: travis_wait 48 make teradatalabs/mapr-hive-kerberized before_deploy: # decrypt the github deploy key diff --git a/Makefile b/Makefile index 1024abe..97bcaa0 100644 --- a/Makefile +++ b/Makefile @@ -155,7 +155,7 @@ $(IMAGE_DIRS): %: %/Dockerfile | check-links export TESTED_IMAGE=$* && \ cd test && \ docker-compose up -t 0 -d hadoop-master && \ - time docker-compose run -e EXPECTED_CAPABILITIES="`cat ../$*/capabilities.txt | tr '\n' ' '`" --rm test-runner + time docker-compose run -e EXPECTED_CAPABILITIES="`cat ../$*/capabilities.txt | tr '\n' ' '`" -e IMAGE=$* --rm test-runner # # Static pattern rule to pull docker images that are external dependencies of diff --git a/teradatalabs/mapr-base/Dockerfile b/teradatalabs/mapr-base/Dockerfile new file mode 100644 index 0000000..de73416 --- /dev/null +++ b/teradatalabs/mapr-base/Dockerfile @@ -0,0 +1,80 @@ +# Copyright 2017 Teradata +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM teradatalabs/centos6-java8-oracle +MAINTAINER Teradata Docker Team + +# ADD MAPR REPO +ADD files/maprtech.repo /etc/yum.repos.d/maprtech.repo +COPY files/id_rsa.pub /root/ + +RUN yum update -y \ + +# ... GET MapRGPG KEY + && rpm --import http://package.mapr.com/releases/pub/maprgpg.key \ + +# INSTALL UTILITY SOFTWARE + && yum install -y iputils openssh-server openssh-clients sudo lsof \ +# CONFIGURE SSH + && chkconfig sshd on \ + && grep -rl '#Port 22' /etc/ssh/sshd_config | xargs sed -i 's/#Port 22/Port 22/g' \ + && service sshd start \ + +# INSTALL MAPR + && yum install -y mapr-fileserver mapr-nfs mapr-nodemanager mapr-cldb \ + && yum install -y mapr-zookeeper mapr-resourcemanager mapr-historyserver \ + && yum install -y mapr-webserver mapr-gateway mapr-httpfs \ + +# ADD USERS AND CHANGE OWNERSHIPS + && adduser mapr \ + && adduser hive \ + && adduser hdfs \ + && touch /home/mapr /home/hive /home/hdfs \ + && echo "cd /home/mapr" >> /home/mapr/.bashrc \ + && echo "cd /home/hive" >> /home/hive/.bashrc \ + && echo "cd /home/hdfs" >> /home/hdfs/.bashrc \ + && chown -R mapr:mapr /home/mapr /opt/mapr/httpfs \ + && chown hive:hive /home/hive \ + && chown hdfs:hdfs /home/hdfs \ +# CONFIGURE ZOOKEEPER'S DATA DIRECTORY + && rm -rf /opt/mapr/zkdata \ + && mkdir /opt/mapr/zkdata \ + && chmod 777 /opt/mapr/zkdata \ + && mkdir -p /mapr \ + +# INSTALL PYTHON AND SUPERVISORD + && yum install -y python-setuptools \ + && easy_install pip \ + && pip install supervisor \ + && mkdir /etc/supervisord.d/ \ +# ... AND ITS MISSING DEPENDENCY + && wget http://dl.fedoraproject.org/pub/epel/6/x86_64/python-meld3-0.6.7-1.el6.x86_64.rpm \ + && rpm -ihv python-meld3-0.6.7-1.el6.x86_64.rpm \ + && rm python-meld3-0.6.7-1.el6.x86_64.rpm \ + +# CLEANUP + && yum -y clean all && rm -rf /tmp/* /var/tmp/* \ + +# GENERATE SSH KEYS + && ssh-keygen -t rsa -b 4096 -C "automation@teradata.com" -N "" -f /root/.ssh/id_rsa \ + && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \ + && cat /root/id_rsa.pub | cat >> ~/.ssh/authorized_keys + +# Copy supervisord startup script and base configs +COPY files/startup.sh /root/startup.sh +COPY files/supervisord.conf /etc/supervisord.conf +COPY files/supervisord.d/bootstrap.conf /etc/supervisord.d/bootstrap.conf + +# Add supervisord configs in child images +ONBUILD COPY files/supervisord.d/* /etc/supervisord.d/ diff --git a/teradatalabs/mapr-base/README.md b/teradatalabs/mapr-base/README.md new file mode 100644 index 0000000..d0cae4a --- /dev/null +++ b/teradatalabs/mapr-base/README.md @@ -0,0 +1,13 @@ +# mapr-base [![][layers-badge]][layers-link] [![][version-badge]][dockerhub-link] + +[layers-badge]: https://images.microbadger.com/badges/image/teradatalabs/mapr-base.svg +[layers-link]: https://microbadger.com/images/teradatalabs/mapr-base +[version-badge]: https://images.microbadger.com/badges/version/teradatalabs/mapr-base.svg +[dockerhub-link]: https://hub.docker.com/r/teradatalabs/mapr-base + +A Hadoop base image for creating images testing Presto, based on the MapR distro. + +## Oracle license + +By using this image, you accept the Oracle Binary Code License Agreement for Java SE available here: +[http://www.oracle.com/technetwork/java/javase/terms/license/index.html](http://www.oracle.com/technetwork/java/javase/terms/license/index.html) \ No newline at end of file diff --git a/teradatalabs/mapr-base/files/id_rsa.pub b/teradatalabs/mapr-base/files/id_rsa.pub new file mode 100644 index 0000000..cdd0869 --- /dev/null +++ b/teradatalabs/mapr-base/files/id_rsa.pub @@ -0,0 +1 @@ +ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA1PL4EwRZFy1ewBTa4a1TK+mQ4rAupOeZsiqir/su61dAGvC6pEFAa+Litj6ub6NvcBRMAdXeBtbOnQpInE7BFwKVhwU3n60Mc69SjLiozK3Oxh9sfmbJv/JdELRS5aB9x82Y0bO5fZFPFj7SxPNMugQQMEMQHW01wsa5nJR2pYLwCtu7yoD6fQ0TJEsRqWwyQTNoR19yzL6h7p/hq9SqiqCKfsHWK4+Tj0IgF7Nwz8i+BqqOq2kC9lTRuT8HalNbqVVQ6iI+ER7FgdfSZtKKX6R9SOaKQ7p0Dt6JLFibMNhjwt5EKHsgfMOsl1G8SEncDREtTng8/JLlvIhiqmWzwQ== root@d57cdb1934d1 diff --git a/teradatalabs/mapr-base/files/maprtech.repo b/teradatalabs/mapr-base/files/maprtech.repo new file mode 100644 index 0000000..bbb4997 --- /dev/null +++ b/teradatalabs/mapr-base/files/maprtech.repo @@ -0,0 +1,13 @@ +[maprtech] +name=MapR Technologies +baseurl=http://package.mapr.com/releases/v5.2.0/redhat/ +enabled=1 +gpgcheck=0 +protect=1 + +[maprecosystem] +name=MapR Technologies +baseurl=http://package.mapr.com/releases/MEP/MEP-1.0/redhat +enabled=1 +gpgcheck=0 +protect=1 diff --git a/teradatalabs/mapr-base/files/startup.sh b/teradatalabs/mapr-base/files/startup.sh new file mode 120000 index 0000000..9a9f191 --- /dev/null +++ b/teradatalabs/mapr-base/files/startup.sh @@ -0,0 +1 @@ +../../../commons/startup.sh \ No newline at end of file diff --git a/teradatalabs/mapr-base/files/supervisord.conf b/teradatalabs/mapr-base/files/supervisord.conf new file mode 120000 index 0000000..c821a2d --- /dev/null +++ b/teradatalabs/mapr-base/files/supervisord.conf @@ -0,0 +1 @@ +../../../commons/supervisord.conf \ No newline at end of file diff --git a/teradatalabs/mapr-base/files/supervisord.d/bootstrap.conf b/teradatalabs/mapr-base/files/supervisord.d/bootstrap.conf new file mode 120000 index 0000000..16a8759 --- /dev/null +++ b/teradatalabs/mapr-base/files/supervisord.d/bootstrap.conf @@ -0,0 +1 @@ +../../../../commons/supervisord.d/bootstrap.conf \ No newline at end of file diff --git a/teradatalabs/mapr-hive-kerberized/Dockerfile b/teradatalabs/mapr-hive-kerberized/Dockerfile new file mode 100644 index 0000000..fc2fa5e --- /dev/null +++ b/teradatalabs/mapr-hive-kerberized/Dockerfile @@ -0,0 +1,89 @@ +# Copyright 2017 Teradata +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM teradatalabs/mapr-hive +MAINTAINER Teradata Docker Team + +# REMOVE UNNECESSARY FILES +RUN rm -rf /opt/mapr/conf/ssl_truststore \ + && rm -rf /opt/mapr/conf/maprserverticket \ + && rm -rf /opt/mapr/conf/cldb.key \ + && rm -rf /opt/mapr/conf/ssl_keystore \ + && rm -rf /root/bootstrap.sh /root/warden_tracker.sh \ + +# INSTALL KERBEROS + && yum install -y krb5-libs krb5-server krb5-workstation + +# ADD KERBEROS CONFIGURATION +ADD files/bootstrap.sh /root/ +ADD files/warden_tracker.sh /root/ +ADD files/kerberos/krb5.conf /etc/krb5.conf +ADD files/kerberos/kdc.conf /var/kerberos/krb5kdc/kdc.conf +ADD files/kerberos/kadm5.acl /var/kerberos/krb5kdc/kadm5.acl +ADD files/jceJars/local_policy.jar /usr/java/jdk1.8.0_102/jre/lib/security/local_policy.jar +ADD files/jceJars/US_export_policy.jar /usr/java/jdk1.8.0_102/jre/lib/security/US_export_policy.jar + +# ENABLE HIVE SECURITY +ADD files/conf/hive-site.xml /opt/mapr/hive/hive-1.2/conf/hive-site.xml + +# CREATE KERBEROS DATABASE +RUN /usr/sbin/kdb5_util create -s -P password \ + && usermod -g root hdfs \ + && usermod -g mapr hdfs \ + +# CREATE MAPR AND HIVE PRINCIPALS AND KEYTABS + && /usr/sbin/kadmin.local -q "addprinc -randkey mapr/mycluster@LABS.TERADATA.COM" \ + && /usr/sbin/kadmin.local -q "xst -norandkey -k /opt/mapr/conf/mapr.keytab mapr/mycluster@LABS.TERADATA.COM" \ + && /usr/sbin/kadmin.local -q "addprinc -randkey hive/mycluster@LABS.TERADATA.COM" \ + && /usr/sbin/kadmin.local -q "xst -norandkey -k /opt/mapr/conf/hive.keytab hive/mycluster@LABS.TERADATA.COM" \ + +# CREATE HDFS USER + && /usr/sbin/kadmin.local -q "addprinc -randkey hdfs/mycluster@LABS.TERADATA.COM" \ + && /usr/sbin/kadmin.local -q "xst -norandkey -k /opt/mapr/conf/hdfs.keytab hdfs/mycluster@LABS.TERADATA.COM" \ + +# CHANGE THE PERMISSIONS AND OWNERSHIPS FOR KEYTABS + && chmod 644 /opt/mapr/conf/hive.keytab /opt/mapr/conf/mapr.keytab /opt/mapr/conf/hdfs.keytab \ + && chmod 777 /root/bootstrap.sh /root/warden_tracker.sh \ + && chown mapr:mapr /opt/mapr/conf/mapr.keytab \ + && chown hive:hive /opt/mapr/conf/hive.keytab \ + && chown hdfs:hdfs /opt/mapr/conf/hdfs.keytab \ + +# CREATE PRESTO PRINCIPAL AND KEYTAB + && /usr/sbin/kadmin.local -q "addprinc -randkey presto-server/presto-master.docker.cluster@LABS.TERADATA.COM" \ + && /usr/sbin/kadmin.local -q "addprinc -randkey presto-client/presto-master.docker.cluster@LABS.TERADATA.COM" \ + && /usr/sbin/kadmin.local -q "addprinc -randkey hive/presto-master.docker.cluster@LABS.TERADATA.COM" \ + && mkdir -p /etc/presto/conf \ + && /usr/sbin/kadmin.local -q "xst -norandkey -k /etc/presto/conf/presto-server.keytab presto-server/presto-master.docker.cluster" \ + && /usr/sbin/kadmin.local -q "xst -norandkey -k /etc/presto/conf/presto-client.keytab presto-client/presto-master.docker.cluster" \ + && /usr/sbin/kadmin.local -q "xst -norandkey -k /etc/presto/conf/hive-presto-master.keytab hive/presto-master.docker.cluster" \ + && chmod 644 /etc/presto/conf/*.keytab \ + && cat /opt/mapr/conf/env.sh | sed -e '0,/MAPR_HIVE_SERVER_LOGIN_OPTS="-Dhadoop.login=maprsasl_keytab"/ s/MAPR_HIVE_SERVER_LOGIN_OPTS="-Dhadoop.login=maprsasl_keytab"/MAPR_HIVE_SERVER_LOGIN_OPTS="-Dhadoop.login=hybrid"/' > env_new.sh \ + && cat env_new.sh | sed -e '0,/MAPR_HIVE_LOGIN_OPTS="-Dhadoop.login=maprsasl"/ s/MAPR_HIVE_LOGIN_OPTS="-Dhadoop.login=maprsasl"/MAPR_HIVE_LOGIN_OPTS="-Dhadoop.login=hybrid"/' > /opt/mapr/conf/env.sh \ + && rm -rf env_new.sh + +# CREATE SSL KEYSTORE +RUN keytool -genkeypair \ + -alias presto \ + -keyalg RSA \ + -keystore /etc/presto/conf/keystore.jks \ + -keypass password \ + -storepass password \ + -dname "CN=presto-master, OU=, O=, L=, S=, C=" +RUN chmod 644 /etc/presto/conf/keystore.jks + +# EXPOSE KERBEROS PORTS +EXPOSE 88 +EXPOSE 749 + +CMD /root/startup.sh diff --git a/teradatalabs/mapr-hive-kerberized/README.md b/teradatalabs/mapr-hive-kerberized/README.md new file mode 100644 index 0000000..d38d024 --- /dev/null +++ b/teradatalabs/mapr-hive-kerberized/README.md @@ -0,0 +1,23 @@ +# mapr-hive-kerberized [![][layers-badge]][layers-link] [![][version-badge]][dockerhub-link] + +[layers-badge]: https://images.microbadger.com/badges/image/teradatalabs/mapr-hive-kerberized.svg +[layers-link]: https://microbadger.com/images/teradatalabs/mapr-hive-kerberized +[version-badge]: https://images.microbadger.com/badges/version/teradatalabs/mapr-hive-kerberized.svg +[dockerhub-link]: https://hub.docker.com/r/teradatalabs/mapr-hive-kerberized + + +Docker image with MapR FS, YARN and HIVE installed in a kerberized environment. Please note that running services have lower memory heap size set. +For more details please check hadoop-env.sh(configuration) file. +If you want to work on larger datasets please tune those settings accordingly, the current settings should be optimal +for general correctness testing. + +## Run + +``` +$ docker run --privileged -d --name hadoop-master -h hadoop-master teradatalabs/mapr-hive-kerberized +``` + +## Oracle license + +By using this image, you accept the Oracle Binary Code License Agreement for Java SE available here: +[http://www.oracle.com/technetwork/java/javase/terms/license/index.html](http://www.oracle.com/technetwork/java/javase/terms/license/index.html) diff --git a/teradatalabs/mapr-hive-kerberized/files/bootstrap.sh b/teradatalabs/mapr-hive-kerberized/files/bootstrap.sh new file mode 100644 index 0000000..5553f30 --- /dev/null +++ b/teradatalabs/mapr-hive-kerberized/files/bootstrap.sh @@ -0,0 +1,39 @@ +#!/bin/sh + + +# START SSHD AND THE SOCKS PROXY FOR THE HIVE METASTORE +supervisorctl start sshd +supervisorctl start socks-proxy + +# CONFIGURE MAPR +/opt/mapr/server/configure.sh -N mycluster -Z localhost -C localhost -HS localhost -no-autostart + +# SETUP DISK FOR MAPR BY RUNNING disksetup +/opt/mapr/server/disksetup -M -F /root/disk.txt + +# CREATE HIVE PROXY USERS +chmod 755 /opt/mapr/conf/proxy + +# CONFIGURE HIVE +/opt/mapr/server/configure.sh -R + +# ENABLE SECURITY IN MAPR +/opt/mapr/server/configure.sh -secure -genkeys -C localhost -Z localhost -N mycluster -no-autostart + +# START KERBEROS SERVICES +/sbin/service krb5kdc start +/sbin/service kadmin start + +# START MAPR SERVICES +service mapr-zookeeper start +service mapr-warden start + +# WAIT FOR WARDEN TO START ALL THE SERVICES +/root/warden_tracker.sh + +# START HTTPFS SERVICES +maprcli node services -name httpfs -action start -nodes $(hostname) + +# RUN HDFS COMMANDS +hadoop fs -mkdir /user /user/root /user/hive /user/hdfs /user/hive/warehouse /var /var/mapr /var/mapr/cluster /var/mapr/cluster/yarn /var/mapr/cluster/yarn/rm /var/mapr/cluster/yarn/rm/staging /var/mapr/cluster/yarn/rm/staging/hive +hadoop fs -chmod 777 /user/hive /user/hdfs /user/hive/warehouse /var/mapr /var/mapr/cluster/yarn/rm/staging/hive diff --git a/teradatalabs/mapr-hive-kerberized/files/conf/hive-site.xml b/teradatalabs/mapr-hive-kerberized/files/conf/hive-site.xml new file mode 100644 index 0000000..beeeebd --- /dev/null +++ b/teradatalabs/mapr-hive-kerberized/files/conf/hive-site.xml @@ -0,0 +1,94 @@ + + + + + + + datanucleus.schema.autoCreateAll + true + creates necessary schema on a startup if one doesn't exist. set + this to false, after creating it once + + + + hive.server2.enable.doAs + true + Set this property to enable impersonation in Hive Server 2 + + + + hive.metastore.execute.setugi + true + Set this property to enable Hive Metastore service impersonation in unsecure mode. In unsecure mode, setting this property to true will cause the metastore to execute DFS operations using the client's reported user and group permissions. Note that this property must be set on both the client and server sides. If the client sets it to true and the server sets it to false, the client setting will be ignored. + + + + javax.jdo.option.ConnectionURL + jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true + JDBC connect string for a JDBC metastore + + + + javax.jdo.option.ConnectionDriverName + com.mysql.jdbc.Driver + Driver class name for a JDBC metastore + + + + javax.jdo.option.ConnectionUserName + root + username to use against metastore database + + + + javax.jdo.option.ConnectionPassword + root + password to use against metastore database + + + + hive.metastore.uris + thrift://localhost:9083 + + + + + + hive.metastore.kerberos.keytab.file + /opt/mapr/conf/hive.keytab + The path to the Kerberos Keytab file containing the metastore thrift server's service principal. + + + hive.metastore.kerberos.principal + hive/mycluster@LABS.TERADATA.COM + The service principal for the metastore thrift server. The special string _HOST will be replaced automatically with the correct hostname. + + + hive.server2.authentication + KERBEROS + authenticationtype + + + hive.server2.authentication.kerberos.principal + hive/mycluster@LABS.TERADATA.COM + HiveServer2 principal. If _HOST is used as the FQDN portion, it will be replaced with the actual hostname of the running instance. + + + hive.server2.authentication.kerberos.keytab + /opt/mapr/conf/hive.keytab + Keytab file for HiveServer2 principal + + diff --git a/teradatalabs/mapr-hive-kerberized/files/jceJars/US_export_policy.jar b/teradatalabs/mapr-hive-kerberized/files/jceJars/US_export_policy.jar new file mode 100644 index 0000000..251b102 Binary files /dev/null and b/teradatalabs/mapr-hive-kerberized/files/jceJars/US_export_policy.jar differ diff --git a/teradatalabs/mapr-hive-kerberized/files/jceJars/local_policy.jar b/teradatalabs/mapr-hive-kerberized/files/jceJars/local_policy.jar new file mode 100644 index 0000000..1c58939 Binary files /dev/null and b/teradatalabs/mapr-hive-kerberized/files/jceJars/local_policy.jar differ diff --git a/teradatalabs/mapr-hive-kerberized/files/kerberos/kadm5.acl b/teradatalabs/mapr-hive-kerberized/files/kerberos/kadm5.acl new file mode 100644 index 0000000..0530526 --- /dev/null +++ b/teradatalabs/mapr-hive-kerberized/files/kerberos/kadm5.acl @@ -0,0 +1 @@ +*/admin@LABS.TERADATA.COM * diff --git a/teradatalabs/mapr-hive-kerberized/files/kerberos/kdc.conf b/teradatalabs/mapr-hive-kerberized/files/kerberos/kdc.conf new file mode 100644 index 0000000..c864897 --- /dev/null +++ b/teradatalabs/mapr-hive-kerberized/files/kerberos/kdc.conf @@ -0,0 +1,12 @@ +[kdcdefaults] + kdc_ports = 88 + kdc_tcp_ports = 88 + +[realms] + LABS.TERADATA.COM = { + #master_key_type = aes256-cts + acl_file = /var/kerberos/krb5kdc/kadm5.acl + dict_file = /usr/share/dict/words + admin_keytab = /var/kerberos/krb5kdc/kadm5.keytab + supported_enctypes = aes256-cts:normal aes128-cts:normal des3-hmac-sha1:normal arcfour-hmac:normal des-hmac-sha1:normal des-cbc-md5:normal des-cbc-crc:normal + } diff --git a/teradatalabs/mapr-hive-kerberized/files/kerberos/krb5.conf b/teradatalabs/mapr-hive-kerberized/files/kerberos/krb5.conf new file mode 100644 index 0000000..344e348 --- /dev/null +++ b/teradatalabs/mapr-hive-kerberized/files/kerberos/krb5.conf @@ -0,0 +1,18 @@ +[logging] +default = FILE:/var/log/krb5libs.log +kdc = FILE:/var/log/krb5kdc.log +admin_server = FILE:/var/log/kadmind.log + +[libdefaults] +default_realm = LABS.TERADATA.COM +dns_lookup_realm = false +dns_lookup_kdc = false +ticket_lifetime = 24h +renew_lifetime = 7d +forwardable = true + +[realms] +LABS.TERADATA.COM = { +kdc = hadoop-master +admin_server = hadoop-master +} diff --git a/teradatalabs/mapr-hive-kerberized/files/warden_tracker.sh b/teradatalabs/mapr-hive-kerberized/files/warden_tracker.sh new file mode 100644 index 0000000..78e1ef2 --- /dev/null +++ b/teradatalabs/mapr-hive-kerberized/files/warden_tracker.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +HOSTNAME=$(hostname) +MAPR_LOGIN_READY=1 +MAPR_CLI_READY=$(maprcli service list -node $HOSTNAME | grep 'ERROR (10009)' | wc -l) + +# CHECK RUNNING STATUS OF GIVEN SERVICES USING MAPRCLI +function exposes_mapr_services { + SERVICES=0 + while [ $SERVICES -ne 2 ] + do + SERVICES=$(maprcli service list -node $HOSTNAME | grep $1 | awk '{$1=$1};1' | tr ' ' '\n' | tail -1f) + done +} + +# WAIT FOR MAPRCLI +while [ $MAPR_CLI_READY == 1 ] +do + MAPR_CLI_READY=$(maprcli service list -node $HOSTNAME | grep 'ERROR (10009)' | wc -l) + if [ $MAPR_LOGIN_READY -ne 0 ] + then + # CREATE KERBEROS TICKET + kinit -kt /opt/mapr/conf/mapr.keytab mapr/mycluster@LABS.TERADATA.COM + + # CREATE MAPR TICKET + maprlogin kerberos -user mapr/mycluster@LABS.TERADATA.COM + MAPR_LOGIN_READY=$? + fi +done + +# WAIT FOR MAPR SERVICES TO START +exposes_mapr_services CLDB +exposes_mapr_services NodeManager +exposes_mapr_services ResourceManager +exposes_mapr_services HiveMetastore +exposes_mapr_services HiveServer2 +exposes_mapr_services JobHistoryServer diff --git a/teradatalabs/mapr-hive/Dockerfile b/teradatalabs/mapr-hive/Dockerfile new file mode 100644 index 0000000..b29a83d --- /dev/null +++ b/teradatalabs/mapr-hive/Dockerfile @@ -0,0 +1,56 @@ +# Copyright 2017 Teradata +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM teradatalabs/mapr-base +MAINTAINER Teradata Docker Team + +# ADD ALL REQUIRED SCRIPTS AND FILES TO ROOT DIRECTORY +ADD files/setup.sh /root/ +ADD files/warden_tracker.sh /root/ +ADD files/bootstrap.sh /root/ +ADD files/socks-proxy.sh /root/ +ADD files/conf/hive-site.xml /opt/mapr/hive/hive-1.2/conf/hive-site.xml +ADD files/conf/core-site.xml /opt/mapr/hadoop/hadoop-2.7.0/etc/hadoop/core-site.xml +COPY files/supervisord.d/mysql-metastore.conf /etc/supervisord.d/ +COPY files/supervisord.d/socks-proxy.conf /etc/supervisord.d/ +COPY files/supervisord.d/sshd.conf /etc/supervisord.d/ + +RUN chmod 777 /root/setup.sh /root/warden_tracker.sh /root/bootstrap.sh /root/socks-proxy.sh /root/startup.sh \ + +# INSTALL MYSQL SERVER + && yum install -y mysql-server mysql-connector-java \ + && /root/setup.sh \ + +# INSTALL MAPR HIVE + && yum install -y mapr-hive mapr-hiveserver2 mapr-hivemetastore \ + && yum -y clean all && rm -rf /tmp/* /var/tmp/* \ + +# SETUP FLAT FILE /home/mapr/storagefile + && dd if=/dev/zero of=/home/mapr/storagefile bs=250M count=30 \ +# THE /root/disk.txt IS READ BY MapR TO DETERMINE WHICH DISKS IT CAN USE AND THE /home/mapr/storagefile IS A FLAT FILE THAT ACTS AS A DISK + && echo "/home/mapr/storagefile" > /root/disk.txt + +# HDFS PORTS +EXPOSE 5660 6660 5692 5724 5756 6692 6724 6756 + +# YARN PORTS +EXPOSE 8030 8020 7222 8088 7220 7221 8443 8031 8032 8033 8040 8041 8042 8088 10020 19888 + +# HIVE PORT +EXPOSE 9083 10000 22 + +# SOCKS PORT +EXPOSE 1180 + +CMD /root/startup.sh diff --git a/teradatalabs/mapr-hive/README.md b/teradatalabs/mapr-hive/README.md new file mode 100644 index 0000000..f82e4da --- /dev/null +++ b/teradatalabs/mapr-hive/README.md @@ -0,0 +1,22 @@ +# mapr-hive [![][layers-badge]][layers-link] [![][version-badge]][dockerhub-link] + +[layers-badge]: https://images.microbadger.com/badges/image/teradatalabs/mapr-hive.svg +[layers-link]: https://microbadger.com/images/teradatalabs/mapr-hive +[version-badge]: https://images.microbadger.com/badges/version/teradatalabs/mapr-hive.svg +[dockerhub-link]: https://hub.docker.com/r/teradatalabs/mapr-hive + +Docker image with MapR FS, YARN and HIVE installed. Please note that running services have lower memory heap size set. +For more details please check hadoop-env.sh(configuration) file. +If you want to work on larger datasets please tune those settings accordingly, the current settings should be optimal +for general correctness testing. + +## Run + +``` +$ docker run --privileged -d --name hadoop-master -h hadoop-master teradatalabs/mapr-hive +``` + +## Oracle license + +By using this image, you accept the Oracle Binary Code License Agreement for Java SE available here: +[http://www.oracle.com/technetwork/java/javase/terms/license/index.html](http://www.oracle.com/technetwork/java/javase/terms/license/index.html) diff --git a/teradatalabs/mapr-hive/capabilities.txt b/teradatalabs/mapr-hive/capabilities.txt new file mode 100644 index 0000000..9d44a71 --- /dev/null +++ b/teradatalabs/mapr-hive/capabilities.txt @@ -0,0 +1,4 @@ +exposes_mapr +allows_creating_a_table_in_hive +allows_selecting_from_the_table +exposes_socks_proxy diff --git a/teradatalabs/mapr-hive/files/bootstrap.sh b/teradatalabs/mapr-hive/files/bootstrap.sh new file mode 100644 index 0000000..b460bd7 --- /dev/null +++ b/teradatalabs/mapr-hive/files/bootstrap.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +# START SSHD AND THE SOCKS PROXY FOR THE HIVE METASTORE +supervisorctl start sshd +supervisorctl start socks-proxy + +# CONFIGURE MAPR +/opt/mapr/server/configure.sh -N mycluster -Z localhost -C localhost -HS localhost -no-autostart + +# SETUP DISK FOR MAPR BY RUNNING DISKSETUP +/opt/mapr/server/disksetup -M -F /root/disk.txt + +# CREATE HIVE PROXY USERS +chmod 755 /opt/mapr/conf/proxy + +# START SERVICES +service mapr-zookeeper start +service mapr-warden start + +# CONFIGURE HIVE +/opt/mapr/server/configure.sh -R + +# WAIT FOR WARDEN TO START ALL THE SERVICES +/root/warden_tracker.sh + +# START HTTPFS SERVICES +maprcli node services -name httpfs -action start -nodes $(hostname) + +# RUN HDFS COMMANDS +hadoop fs -mkdir /user/root /user/hive /user/hdfs /user/hive/warehouse /var /var/mapr /var/mapr/cluster /var/mapr/cluster/yarn /var/mapr/cluster/yarn/rm /var/mapr/cluster/yarn/rm/staging /var/mapr/cluster/yarn/rm/staging/hive +hadoop fs -chmod 777 /user/hive /user/hdfs /user/hive/warehouse /var/mapr /var/mapr/cluster/yarn/rm/staging/hive diff --git a/teradatalabs/mapr-hive/files/conf/core-site.xml b/teradatalabs/mapr-hive/files/conf/core-site.xml new file mode 100644 index 0000000..6549e0b --- /dev/null +++ b/teradatalabs/mapr-hive/files/conf/core-site.xml @@ -0,0 +1,33 @@ + + + + + + + + + hadoop.proxyuser.mapr.groups + * + Allow the superuser mapr to impersonate any member of any group + + + + hadoop.proxyuser.mapr.hosts + * + The superuser can connect from any host to impersonate a user + + + + diff --git a/teradatalabs/mapr-hive/files/conf/hive-site.xml b/teradatalabs/mapr-hive/files/conf/hive-site.xml new file mode 100644 index 0000000..3518b61 --- /dev/null +++ b/teradatalabs/mapr-hive/files/conf/hive-site.xml @@ -0,0 +1,67 @@ + + + + + + + datanucleus.schema.autoCreateAll + true + creates necessary schema on a startup if one doesn't exist. set + this to false, after creating it once + + + + hive.server2.enable.doAs + true + Set this property to enable impersonation in Hive Server 2 + + + + hive.metastore.execute.setugi + true + Set this property to enable Hive Metastore service impersonation in unsecure mode. In unsecure mode, setting this property to true will cause the metastore to execute DFS operations using the client's reported user and group permissions. Note that this property must be set on both the client and server sides. If the client sets it to true and the server sets it to false, the client setting will be ignored. + + + + javax.jdo.option.ConnectionURL + jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true + JDBC connect string for a JDBC metastore + + + + javax.jdo.option.ConnectionDriverName + com.mysql.jdbc.Driver + Driver class name for a JDBC metastore + + + + javax.jdo.option.ConnectionUserName + root + username to use against metastore database + + + + javax.jdo.option.ConnectionPassword + root + password to use against metastore database + + + + hive.metastore.uris + thrift://localhost:9083 + + + diff --git a/teradatalabs/mapr-hive/files/setup.sh b/teradatalabs/mapr-hive/files/setup.sh new file mode 100644 index 0000000..2781708 --- /dev/null +++ b/teradatalabs/mapr-hive/files/setup.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# SETUP METASTORE +mysql_install_db + +/usr/bin/mysqld_safe & +sleep 10s + +echo "GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' WITH GRANT OPTION; FLUSH PRIVILEGES;" | mysql +echo "CREATE DATABASE metastore; USE metastore; SOURCE /usr/lib/hive/scripts/metastore/upgrade/mysql/hive-schema-0.13.0.mysql.sql;" | mysql +/usr/bin/mysqladmin -u root password 'root' + +killall mysqld +sleep 10s +mkdir /var/log/mysql/ +chown mysql:mysql /var/log/mysql/ diff --git a/teradatalabs/mapr-hive/files/socks-proxy.sh b/teradatalabs/mapr-hive/files/socks-proxy.sh new file mode 120000 index 0000000..852d770 --- /dev/null +++ b/teradatalabs/mapr-hive/files/socks-proxy.sh @@ -0,0 +1 @@ +../../../commons/socks-proxy.sh \ No newline at end of file diff --git a/teradatalabs/mapr-hive/files/supervisord.d/mysql-metastore.conf b/teradatalabs/mapr-hive/files/supervisord.d/mysql-metastore.conf new file mode 120000 index 0000000..b48213c --- /dev/null +++ b/teradatalabs/mapr-hive/files/supervisord.d/mysql-metastore.conf @@ -0,0 +1 @@ +../../../../commons/supervisord.d/mysql-metastore.conf \ No newline at end of file diff --git a/teradatalabs/mapr-hive/files/supervisord.d/socks-proxy.conf b/teradatalabs/mapr-hive/files/supervisord.d/socks-proxy.conf new file mode 120000 index 0000000..905e977 --- /dev/null +++ b/teradatalabs/mapr-hive/files/supervisord.d/socks-proxy.conf @@ -0,0 +1 @@ +../../../../commons/supervisord.d/socks-proxy.conf \ No newline at end of file diff --git a/teradatalabs/mapr-hive/files/supervisord.d/sshd.conf b/teradatalabs/mapr-hive/files/supervisord.d/sshd.conf new file mode 120000 index 0000000..31d04a8 --- /dev/null +++ b/teradatalabs/mapr-hive/files/supervisord.d/sshd.conf @@ -0,0 +1 @@ +../../../../commons/supervisord.d/sshd.conf \ No newline at end of file diff --git a/teradatalabs/mapr-hive/files/warden_tracker.sh b/teradatalabs/mapr-hive/files/warden_tracker.sh new file mode 100644 index 0000000..4cab14b --- /dev/null +++ b/teradatalabs/mapr-hive/files/warden_tracker.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +HOSTNAME=$(hostname) +MAPR_CLI_READY=$(maprcli service list -node $HOSTNAME | grep 'ERROR (10009)' | wc -l) + +# CHECK RUNNING STATUS OF GIVEN SERVICES USING MAPRCLI +function exposes_mapr_services { + SERVICES=0 + while [ $SERVICES -ne 2 ] + do + SERVICES=$(maprcli service list -node $HOSTNAME | grep $1 | awk '{$1=$1};1' | tr ' ' '\n' | tail -1f) + done +} + +# WAIT FOR MAPRCLI +while [ $MAPR_CLI_READY == 1 ] +do + MAPR_CLI_READY=$(maprcli service list -node $HOSTNAME | grep 'ERROR (10009)' | wc -l) +done + + +# WAIT FOR MAPR SERVICES TO START +exposes_mapr_services CLDB +exposes_mapr_services NodeManager +exposes_mapr_services ResourceManager +exposes_mapr_services HiveMetastore +exposes_mapr_services HiveServer2 +exposes_mapr_services JobHistoryServer \ No newline at end of file diff --git a/test/Dockerfile b/test/Dockerfile index 17dfffe..89a0168 100644 --- a/test/Dockerfile +++ b/test/Dockerfile @@ -15,12 +15,22 @@ FROM teradatalabs/centos6-java8-oracle ENV DOCKERIZE_VERSION v0.3.0 - +RUN mkdir /root/.ssh +COPY /files/conf/* /root/.ssh/ +COPY files/* /root/ RUN \ yum install -y \ wget \ unzip \ +# INSTALL UTILITY SOFTWARE + && yum install -y openssh-server openssh-clients \ + +# CONFIGURE SSH + && chkconfig sshd on \ + && grep -rl '#Port 22' /etc/ssh/sshd_config | xargs sed -i 's/#Port 22/Port 22/g' \ + && service sshd start \ + # setup CDH repo, pin the CDH distribution to a concrete version && wget -nv http://archive.cloudera.com/cdh5/one-click-install/redhat/6/x86_64/cloudera-cdh-5-0.x86_64.rpm \ && yum --nogpgcheck localinstall -y cloudera-cdh-5-0.x86_64.rpm \ @@ -45,7 +55,9 @@ RUN \ && yum -y clean all && rm -rf /tmp/* /var/tmp/* \ # create 'image_tests' volume mount path - && mkdir /image_tests + && mkdir /image_tests \ + && chmod 777 /root/*.sh \ + && chmod 600 /root/.ssh/id_rsa.pub /root/.ssh/id_rsa VOLUME /image_tests diff --git a/test/docker-compose.yml b/test/docker-compose.yml index 2e04ebe..1908301 100644 --- a/test/docker-compose.yml +++ b/test/docker-compose.yml @@ -4,6 +4,7 @@ services: hadoop-master: image: ${TESTED_IMAGE} hostname: hadoop-master + privileged: true ports: - '1180' diff --git a/test/files/conf/id_rsa b/test/files/conf/id_rsa new file mode 100644 index 0000000..2d01c1c --- /dev/null +++ b/test/files/conf/id_rsa @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEogIBAAKCAQEA1PL4EwRZFy1ewBTa4a1TK+mQ4rAupOeZsiqir/su61dAGvC6 +pEFAa+Litj6ub6NvcBRMAdXeBtbOnQpInE7BFwKVhwU3n60Mc69SjLiozK3Oxh9s +fmbJv/JdELRS5aB9x82Y0bO5fZFPFj7SxPNMugQQMEMQHW01wsa5nJR2pYLwCtu7 +yoD6fQ0TJEsRqWwyQTNoR19yzL6h7p/hq9SqiqCKfsHWK4+Tj0IgF7Nwz8i+BqqO +q2kC9lTRuT8HalNbqVVQ6iI+ER7FgdfSZtKKX6R9SOaKQ7p0Dt6JLFibMNhjwt5E +KHsgfMOsl1G8SEncDREtTng8/JLlvIhiqmWzwQIBIwKCAQAYVkg8sAouiNeg7HDJ +VaMa9h8v2Z7uRlq1RrOB05BVaQ6j/kE3V+oa9Vu9DnpV5sroPNV8jXh1z2gR8ov0 +m0lEdVLq3AZbZDvv9sebDcomBTwz5lWKyesr4S81G+w3gA5f+j1Zy2Wn81l+4pu+ +vLhPxfM4tzUKrWU61OIDRCrRFhHaFiDxBMsMhOSBfGT5OuIIHOAcjvB+YxKgB644 +ANoOnnExaukOOX0ZLgnwIG7YG0Cf6MVJpbK+gx7ER85SnMV/wjzsAedOfF+tD8Z1 +P0nfRXKr9X+CE2m3JMAb4QxXphsVHLAtGjPmzEz/2JNu7mnevJW1iQ0X19giSdyE +0+U7AoGBAPgsgpFrGKf0vy3XKMkIl8uwn+GGKob3/Fc6sGYaGu2fgR/BeMnKf4IT +BsDBgPJPz/ozQaWXWVBCZc5Wqm7CPCiccHzUF8G0PZFMNkR2ayO854co82Sj+sWW +XwS/kdTRRQ6HHr67KKSZFZO60HaWWSJ9Z0IlrVe+k3s0FtMV72pXAoGBANuqF4oi +fmHawxXgIdUcESzJlt3HUjOC32e7rEcnskjrNQxM4ZUkmQrf6IhhFrSdCk5cNCrP +pdaw87YtIEtVO3LvAT8E+dRuIO8LV4VufaBBHUq/TwntldF1/fN41ZpgpehDBH/i +rTXAMncd2junSpsyt800W7KtXsdca0YKODOnAoGAHFzbuNkKIdLUBT0pO4v0F0di +urA/X+HTsjKX0SeODIdB9QArD79fB40lV9uZtUrzMomD2GkRhYPuYLlykFC2awqQ +g0twbei2lEM5Zuj2TTopFsLZ/N+KX7lp8eoCCbGEPCyzDnvKIXCUwG0fI37lnYqs +txo/skGqdHr7S1L2ylMCgYEA1WNnUvzvzMXiFUAg3aY8kehmq45tHBi7w9rh3rjZ +E6Koj5skR70m9J7/JWWhBzl3udXpiKx1NvUKAW2qV88jzrxKWnncv7twcy+W2WQF +AhNeSJyV7GMj0spkaNvIL5EWMhU3kiyoQtf2gleDimf/VO9wuLaE9rcLnRfzLhiO +Xg8CgYEAk6QP8i01oPfY4IzE9JTHbVIglllbygxL7CKid6kH2GtbcnT8NU9aj4YY +XdGRiDRw7/O6sHSKbLkSQ1TUAVil8Xw6tLQddbRI+p93RVw9haa8bCBtzDV7eB4m +M5/MrINmIquLaSBd+l3HA36GCxLzBUsT87UZIQnJu7YauFVJYsU= +-----END RSA PRIVATE KEY----- diff --git a/test/files/conf/id_rsa.pub b/test/files/conf/id_rsa.pub new file mode 100644 index 0000000..cdd0869 --- /dev/null +++ b/test/files/conf/id_rsa.pub @@ -0,0 +1 @@ +ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA1PL4EwRZFy1ewBTa4a1TK+mQ4rAupOeZsiqir/su61dAGvC6pEFAa+Litj6ub6NvcBRMAdXeBtbOnQpInE7BFwKVhwU3n60Mc69SjLiozK3Oxh9sfmbJv/JdELRS5aB9x82Y0bO5fZFPFj7SxPNMugQQMEMQHW01wsa5nJR2pYLwCtu7yoD6fQ0TJEsRqWwyQTNoR19yzL6h7p/hq9SqiqCKfsHWK4+Tj0IgF7Nwz8i+BqqOq2kC9lTRuT8HalNbqVVQ6iI+ER7FgdfSZtKKX6R9SOaKQ7p0Dt6JLFibMNhjwt5EKHsgfMOsl1G8SEncDREtTng8/JLlvIhiqmWzwQ== root@d57cdb1934d1 diff --git a/test/files/expose_mapr_hive.sh b/test/files/expose_mapr_hive.sh new file mode 100644 index 0000000..5fbf875 --- /dev/null +++ b/test/files/expose_mapr_hive.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +#Wait for hadoop-master to be available +conn=0 +while [ $conn -ne 1 ] +do + ping -c 4 -q hadoop-master + if [ "$?" -eq 0 ]; then + conn=1 + else + conn=0 + fi +done + +# Wait for hive and warehouse directories in HDFS +hivecliReady=1 +while [ $hivecliReady -ne 0 ] +do + sleep 5s + ssh -o StrictHostKeyChecking=no root@hadoop-master 'hadoop fs -ls /user/hive/warehouse' + hivecliReady=$? +done diff --git a/test/image_tests/image_tests.bats.sh b/test/image_tests/image_tests.bats.sh index 4f0af22..aee8bb4 100755 --- a/test/image_tests/image_tests.bats.sh +++ b/test/image_tests/image_tests.bats.sh @@ -1,5 +1,15 @@ #!/usr/bin/env bash +get_user() { + skip_if_needed + if [ "${IMAGE}" == "teradatalabs/mapr-hive" ] + then + echo "hive" + else + echo "hdfs" + fi +} + skip_if_needed() { SHOULD_RUN=true # Can't rely on exit codes here, as BATS will fail the test if any of the statements @@ -22,6 +32,11 @@ assert_output_contains() { printf '%s\n' "${lines[@]}" | grep -q $1 } +function exposes_mapr { + skip_if_needed + assert_run sh /root/expose_mapr_hive.sh +} + function exposes_hive { skip_if_needed assert_run dockerize -wait tcp://hadoop-master:10000 -timeout 90s @@ -29,12 +44,12 @@ function exposes_hive { function allows_creating_a_table_in_hive { skip_if_needed - assert_run beeline -n hdfs -u jdbc:hive2://hadoop-master:10000 -e 'create table test as select 42 id' + assert_run beeline -n $(get_user) -u jdbc:hive2://hadoop-master:10000 -e 'create table test as select 42 id' } function allows_selecting_from_the_table { skip_if_needed - assert_run beeline -n hdfs -u jdbc:hive2://hadoop-master:10000 -e 'select * from test' + assert_run beeline -n $(get_user) -u jdbc:hive2://hadoop-master:10000 -e 'select * from test' assert_output_contains 'test.id' assert_output_contains '42' }