diff --git a/roles/common/tasks/main.yml b/roles/common/tasks/main.yml index 632fa82..f5dade5 100644 --- a/roles/common/tasks/main.yml +++ b/roles/common/tasks/main.yml @@ -1,3 +1,8 @@ +#- name: Correct /etc/hosts file +# lineinfile: +# dest: /etc/hosts +# line: "{{ansible_default_ipv4.address}} {{inventory_hostname}} {{ansible_hostname}}" + - name: Check DNS resolver lineinfile: dest=/etc/resolv.conf regexp='^nameserver' line='nameserver {{ dns_ip }}' @@ -8,7 +13,7 @@ state: absent - name: Physically clean repositories - file: path=/etc/yum.repos.d state=absent + file: path=/etc/yum.repos.d/* state=absent when: is_offline == True - name: Initiate repositories diff --git a/roles/mesos/vars/main.yml b/roles/mesos/vars/main.yml index 11c1177..dd839f8 100644 --- a/roles/mesos/vars/main.yml +++ b/roles/mesos/vars/main.yml @@ -3,7 +3,7 @@ mesos_version: 1.1.0 # Download vars mesos_download_dir: /opt/src -mesos_url: {{mirror_base_url}}/mesos +mesos_url: "{{mirror_base_url}}/mesos" mesos_archive: mesos-{{mesos_version}}.tar.gz mesos_archive_url: "{{mesos_url}}/{{mesos_version}}/{{mesos_archive}}" mesos_md5: FF89748A6668425D462DCAA12608CF8A diff --git a/roles/zeppelin/tasks/main.yml b/roles/zeppelin/tasks/main.yml new file mode 100644 index 0000000..2bf8734 --- /dev/null +++ b/roles/zeppelin/tasks/main.yml @@ -0,0 +1,60 @@ +--- +- name: Ensure download directory exists + file: path={{zep_download_dir}} state=directory recurse=yes + +- name: Download binary + get_url: + url: "{{zep_download_url}}" + dest: "{{zep_download_dir}}/{{zep_archive_file}}" + +- name: create zeppelin user to manage daemon + user: + name: zeppelin + groups: hadoop + append: yes + system: yes + home: "{{zep_workdir}}" + +- name: Extract zeppelin + unarchive: + src: "{{zep_download_dir}}/{{zep_archive_file}}" + dest: "{{zep_lib_dir}}" + owner: zeppelin + group: zeppelin + copy: no + creates: "{{zep_target}}" + +- name: Link generic zeppelin dir to specific version directory + file: + path: "{{zep_home}}" + src: "{{zep_target}}" + state: link + +- name: add zeppelin binaries to path + lineinfile: + dest: /etc/profile + line: export PATH=/{{zep_home}}/bin:$PATH + +- name: ensure log & run directories exists + file: + path: "{{item}}" + state: directory + owner: zeppelin + group: zeppelin + mode: 0744 + with_items: + - "{{zep_log_dir}}" + - "{{zep_run_dir}}" + +- name: add configuration file + template: + src: zeppelin-env.sh.j2 + dest: "{{zep_conf_dir}}/zeppelin-env.sh" + owner: root + group: root + mode: 0755 + +- name: add zeppelin service + template: + src: zeppelin.service.j2 + dest: /etc/systemd/system/zeppelin.service diff --git a/roles/zeppelin/templates/zeppelin-env.sh.j2 b/roles/zeppelin/templates/zeppelin-env.sh.j2 new file mode 100644 index 0000000..e778754 --- /dev/null +++ b/roles/zeppelin/templates/zeppelin-env.sh.j2 @@ -0,0 +1,90 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# export JAVA_HOME= +# export MASTER= # Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode. +# export ZEPPELIN_JAVA_OPTS # Additional jvm options. for example, export ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16" +# export ZEPPELIN_MEM # Zeppelin jvm mem options Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m +# export ZEPPELIN_INTP_MEM # zeppelin interpreter process jvm mem options. Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m +# export ZEPPELIN_INTP_JAVA_OPTS # zeppelin interpreter process jvm options. +# export ZEPPELIN_SSL_PORT # ssl port (used when ssl environment variable is set to true) + +export ZEPPELIN_LOG_DIR={{zep_log_dir}} # Where log files are stored. PWD by default. +export ZEPPELIN_PID_DIR={{zep_run_dir}} # The pid files are stored. ${ZEPPELIN_HOME}/run by default. +# export ZEPPELIN_WAR_TEMPDIR # The location of jetty temporary directory. +# export ZEPPELIN_NOTEBOOK_DIR # Where notebook saved +# export ZEPPELIN_NOTEBOOK_HOMESCREEN # Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z +# export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE # hide homescreen notebook from list when this value set to "true". default "false" +# export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved +# export ZEPPELIN_NOTEBOOK_S3_ENDPOINT # Endpoint of the bucket +# export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json +# export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID # AWS KMS key ID +# export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION # AWS KMS key region +# export ZEPPELIN_IDENT_STRING # A string representing this instance of zeppelin. $USER by default. +# export ZEPPELIN_NICENESS # The scheduling priority for daemons. Defaults to 0. +# export ZEPPELIN_INTERPRETER_LOCALREPO # Local repository for interpreter's additional dependency loading +# export ZEPPELIN_INTERPRETER_DEP_MVNREPO # Remote principal repository for interpreter's additional dependency loading +# export ZEPPELIN_HELIUM_NPM_REGISTRY # Remote Npm registry for Helium dependency loader +# export ZEPPELIN_NOTEBOOK_STORAGE # Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote). +# export ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC # If there are multiple notebook storages, should we treat the first one as the only source of truth? +# export ZEPPELIN_NOTEBOOK_PUBLIC # Make notebook public by default when created, private otherwise + +#### Spark interpreter configuration #### + +## Use provided spark installation ## +## defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit +## +export SPARK_HOME={{spark_home}} # (required) When it is defined, load it instead of Zeppelin embedded Spark libraries +# export SPARK_SUBMIT_OPTIONS # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G". +# export SPARK_APP_NAME # (optional) The name of spark application. + +## Use embedded spark binaries ## +## without SPARK_HOME defined, Zeppelin still able to run spark interpreter process using embedded spark binaries. +## however, it is not encouraged when you can define SPARK_HOME +## +# Options read in YARN client mode +# export HADOOP_CONF_DIR # yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR. +# Pyspark (supported with Spark 1.2.1 and above) +# To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI +# export PYSPARK_PYTHON # path to the python command. must be the same path on the driver(Zeppelin) and all workers. +# export PYTHONPATH + +## Spark interpreter options ## +## +# export ZEPPELIN_SPARK_USEHIVECONTEXT # Use HiveContext instead of SQLContext if set true. true by default. +# export ZEPPELIN_SPARK_CONCURRENTSQL # Execute multiple SQL concurrently if set true. false by default. +# export ZEPPELIN_SPARK_IMPORTIMPLICIT # Import implicits, UDF collection, and sql if set true. true by default. +# export ZEPPELIN_SPARK_MAXRESULT # Max number of Spark SQL result to display. 1000 by default. +# export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000 + + +#### HBase interpreter configuration #### + +## To connect to HBase running on a cluster, either HBASE_HOME or HBASE_CONF_DIR must be set + +# export HBASE_HOME= # (require) Under which HBase scripts and configuration should be +# export HBASE_CONF_DIR= # (optional) Alternatively, configuration directory can be set to point to the directory that has hbase-site.xml + +#### ZeppelinHub connection configuration #### +# export ZEPPELINHUB_API_ADDRESS # Refers to the address of the ZeppelinHub service in use +# export ZEPPELINHUB_API_TOKEN # Refers to the Zeppelin instance token of the user +# export ZEPPELINHUB_USER_KEY # Optional, when using Zeppelin with authentication. + +#### Zeppelin impersonation configuration +# export ZEPPELIN_IMPERSONATE_CMD # Optional, when user want to run interpreter as end web user. eg) 'sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c ' +# export ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER #Optional, by default is true; can be set to false if you don't want to use --proxy-user option with Spark interpreter when impersonation enabled diff --git a/roles/zeppelin/templates/zeppelin.service.j2 b/roles/zeppelin/templates/zeppelin.service.j2 new file mode 100644 index 0000000..423f57d --- /dev/null +++ b/roles/zeppelin/templates/zeppelin.service.j2 @@ -0,0 +1,16 @@ +[Unit] +Description=Apache Zeppelin service +Documentation=https://zeppelin.apache.org + +[Service] +Type=forking +User=zeppelin +Group=zeppelin +WorkingDirectory={{zep_home}} +ExecStart={{zep_home}}/bin/zeppelin-daemon.sh start +ExecRestart={{zep_home}}/bin/zeppelin-daemon.sh restart +ExecReload={{zep_home}}/bin/zeppelin-daemon.sh reload +ExecStop={{zep_home}}/bin/zeppelin-daemon.sh stop + +[Install] +WantedBy=multi-user.target diff --git a/roles/zeppelin/vars/main.yml b/roles/zeppelin/vars/main.yml new file mode 100644 index 0000000..5e86e19 --- /dev/null +++ b/roles/zeppelin/vars/main.yml @@ -0,0 +1,21 @@ +--- +# Download conf +zep_version: 0.7.2 +zep_download_dir: /opt/src +zep_url: "{{mirror_base_url}}/zeppelin" +zep_version_name: zeppelin-{{zep_version}}-bin-all +zep_archive_file: "{{zep_version_name}}.tgz" +zep_download_url: "{{zep_url}}/zeppelin-{{zep_version}}/{{zep_archive_file}}" + +# Install conf +zep_lib_dir: "/usr/local/lib" +zep_target: "{{zep_lib_dir}}/{{zep_version_name}}" +zep_home: "{{zep_lib_dir}}/zeppelin" +zep_conf_dir: "{{zep_home}}/conf" +zep_bin_dir: "/usr/local/bin" +zep_workdir: "/var/lib/zep" + +# Spark-env conf +spark_home: /usr/local/lib/spark +zep_log_dir: /var/log/zeppelin +zep_run_dir: /var/run/zeppelin diff --git a/roles/zookeeper/vars/main.yml b/roles/zookeeper/vars/main.yml index 71404c5..9510471 100644 --- a/roles/zookeeper/vars/main.yml +++ b/roles/zookeeper/vars/main.yml @@ -1,7 +1,7 @@ --- zk_version: 3.4.9 zk_download_dir: /opt/src -zk_url: "{{mirror_base_url}}/zookeeper/zookeeper-{{zk_version}}" +zk_url: "{{mirror_base_url}}/zookeeper/zookeeper-{{zk_version}}" zk_archive: zookeeper-{{zk_version}}.tar.gz zk_archive_url: "{{zk_url}}/{{zk_archive}}" zk_md5: 3e8506075212c2d41030d874fcc9dcd2