
本文中直接跳過服務(wù)器之間免密碼登錄以及相關(guān)賬戶的創(chuàng)建和權(quán)限配置的工作。
相關(guān)操作請移步 免密碼 登錄,linux賬戶的添加和配置。
軟件版本
scala
scala-2.10.6.tgzhadoop
hadoop-2.6.4.tar.gz
hadoop-2.6.4-src.tar.gz源代碼mahout
apache-mahout-distribution-0.12.2.tar.gzhive
apache-hive-2.1.0-bin.tar.gz
mysql-connector-java-5.1.39.tar.gz數(shù)據(jù)庫JDBC驅(qū)動spark
spark-1.6.2-bin-hadoop2.6.tgz選版本對應(yīng)的
spark-1.6.2.tgz源代碼hbase
zookeeper-3.4.8.tar.gz提前裝好zookeeper
hbase-1.2.2-bin.tar.gzstorm
apache-storm-1.0.2.tar.gz
apache-storm-1.0.2-src.tar.gz源代碼
集群的三種模式
local(單機)
standalone
yarn
配置清單
JDK
profile
export JAVA_HOME=/usr/local/jdk1.8.0_91
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin
scala
profile
# scala
export SCALA_HOME=/usr/local/scala-2.10.6
export PATH=$PATH:$SCALA_HOME/bin
Hadoop
profile
# hadoop
export HADOOP_HOME=/usr/local/hadoop-2.6.4
export HADOOP_PREFIX=$HADOOP_HOME
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
*.env
$HADOOP_HOME/etc/hadoop/hadoop-env.sh, $HADOOP_HOME/etc/hadoop/mapred-env.sh, $HADOOP_HOME/etc/hadoop/yarn-env.sh
source ~/.bash_profile
# 對于 yarn-env.sh 只需要配置 JAVA_HOME 即可,否則會出現(xiàn)找不到ResourceManager、NodeManager類的問題
# Error: Could not find or load main class org.apache.hadoop.yarn.server.resourcemanager.ResourceManager
# Error: Could not find or load main class org.apache.hadoop.yarn.server.nodemanager.NodeManager
slaves
$HADOOP_HOME/etc/hadoop/slaves
slave1
slave2
core-site
$HADOOP_HOME/etc/hadoop/core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/tmp</value>
<description>A base for other temporary directories.</description>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://master:9000</value>
<final>true</final>
<description>The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
the FileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.</description>
</property>
</configuration>
hdfs-site
$HADOOP_HOME/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.datanode.ipc.address</name>
<value>0.0.0.0:50020</value>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:50075</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
</configuration>
mapred-site
$HADOOP_HOME/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
</configuration>
yarn-site
$HADOOP_HOME/etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
</configuration>
啟動HDFS
# 初始化 hdfs
$HADOOP_HOME/bin/hdfs namenode -format
# 啟動 hdfs
$HADOOP_HOME/sbin/start-dfs.sh
# 啟動 yarn管理集群
$HADOOP_HOME/sbin/start-yarn.sh
# jobhistory啟動
$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver
測試
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.4.jar wordcount <INPUT> <OUTPUT>
端口
| 端口號 | 描述 |
|---|---|
| 8088 | All Applications(yarn-web-ui) |
| 9000 | hdfs端口 |
| 50070 | Namenode information(web-ui) |
| 50090 | SecondaryNamenode information(web-ui) |
| 19888 | JobHistory(web-ui) |
pig
profile
# pig
export PIG_HOME=/usr/local/pig-0.16.0
export PIG_CLASS=$HADOOP_HOME/etc/hadoop
export PATH=$PATH:$PIG_HOME/bin
spark
profile
# spark
export SPARK_HOME=/usr/local/spark-1.6.2-bin-hadoop2.6
export PATH=$PATH:$SPARK_HOME/bin
spark-env
$SPARK_HOME/conf/spark-env.sh
# local, standalone, yarn模式
source ~/.bash_profile
export SPARK_MASTER_IP=master
# jobhistroy
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=7777 -Dspark.history.retainedApplications=2 -Dspark.history.fs.logDirectory=hdfs://master:9000/sparklog"
# 其它配置
export SPARK_WORKER_CORES=2
export SPARK_WORKER_MEMORY=1G
slaves
$SPARK_HOME/conf/slaves
slave1
slave2
spark-default.xml
# jobhistory配置
spark.eventLog.enabled true
spark.eventLog.dir hdfs://master:9000/sparklog
spark.eventLog.compress true
啟動spark
# 啟動計算框架集群
$SPARK_HOME/sbin/start-all.sh
# 啟動HistoryServer
$SPARK_HOME/sbin/start-history-server.sh
測試
# 1
$SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.SparkPi $SPARK_HOME/lib/examples-1.6.2-hadoop2.6.0.jar
# 2
MASTER=local && $SPARK_HOME/bin/run-example SparkPi
端口
| 端口號 | 描述 |
|---|---|
| 4040 | App(web-ui) |
| 7077 | Master |
| 7777 | History(web-ui) |
zookeeper
profile
# zookeeper
export ZOOKEEPER_HOME=/usr/local/zookeeper-3.4.8
export PATH=$PATH:$ZOOKEEPER_HOME/bin
zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
dataDir=/home/zookeeper/data
# dataLogDir=/home/zookeeper/logs
# the port at which the clients will connect
clientPort=2181
server.1=master:2888:3888
server.2=slave1:2888:3888
server.3=slave2:2888:3888
myid
創(chuàng)建 myid 文件 /home/zookeeper/data/myid
# 不同的機器需要分配不同的 id,序號與上述 zoo.cfg 中的 server.* 中的數(shù)字對應(yīng)
echo 1 >> /home/zookeeper/data/myid
啟動
$ZOOKEEPER_HOME/bin/zkServer.sh start
端口
| 端口號 | 描述 |
|---|---|
| 2181 | Zookeeper-Client |
| 2888 | from |
| 3888 | to |
hbase
profile
# hbase
export HBASE_HOME=/usr/local/hbase-1.2.2
export PATH=$PATH:$HBASE_HOME/bin
hbase-env
$HBASE_HOME/conf/hbase-env.sh
source ~/.bash_profile
# export JAVA_HOME HADOOP_HOME HBASE_HOME
export HBASE_CLASSPATH=$HADOOP_HOME/etc/hadoop
export HBASE_MANAGES_ZK=true
export HBASE_LOG_DIR=$HBASE_HOME/logs
regionservers
slave1
slave2
hbase-site.xml
$HBASE_HOME/conf/hbase-site.xml
<configuration>
<property>
<name>hbase.master</name>
<value>master:6000</value>
</property>
<property>
<name>hbase.master.maxclockskew</name>
<value>180000</value>
</property>
<property>
<name>hbase.rootdir</name>
<value>hdfs://master:9000/hbase</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>master,slave1,slave2</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/home/zookeeper/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
啟動HBase
$HBASE_HOME/bin/start-hbase.sh
測試
$HBASE_HOME/bin/hbase-shell
端口
| 端口號 | 描述 |
|---|---|
| 16010 | HBase(web-ui) |
hive
profile
# hive
export HIVE_HOME=/usr/local/apache-hive-2.1.0-bin
export PATH=$PATH:$HIVE_HOME/bin
hive-env.sh
$HIVE_HOME/conf/hive-env.sh
source ~/.bash_profile
export HIVE_CONF_DIR=$HIVE_HOME/conf
hive-site.xml
$HIVE_HOME/conf/hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://master:3306/hive?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hive</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>hdfs://master:9000/user/hive/warehouse</value>
</property>
<property>
<name>hive.hwi.listen.host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>hive.hwi.listen.port</name>
<value>9999</value>
</property>
<property>
<name>hive.hwi.war.file</name>
<value>lib/hive-hwi-2.1.0.war</value>
</property>
</configuration>
初始化
測試
$HIVE_HOME/bin/hive
sqoop
profile
# sqoop
export SQOOP_HOME=/usr/local/sqoop-1.99.6-bin-hadoop200
export PATH=$PATH:$SQOOP_HOME/bin
export CATALINE_BASE=$SQOOP_HOME/server
export LOGDIR=$SQOOP_HOME/logs
測試
# 啟動
$SQOOP_HOME/bin/sqoop2-server start
# cli
$SQOOP_HOME/bin/sqoop2-shell
mahout
storm
第三方包管理工具
maven
下載
配置 path 路徑即可
sbt
Homebrew (Third-party package)
$ brew install sbt
Macports (Third-party package)
$ port install sbt
未完待續(xù)...