大數(shù)據(jù)開發(fā)學(xué)習(xí)平臺安裝配置

入門基礎(chǔ)篇

本文中直接跳過服務(wù)器之間免密碼登錄以及相關(guān)賬戶的創(chuàng)建和權(quán)限配置的工作。
相關(guān)操作請移步 免密碼 登錄,linux賬戶的添加和配置。

軟件版本

jdk
jdk-8u91-linux-x64.tar.gz

scala
scala-2.10.6.tgz

hadoop
hadoop-2.6.4.tar.gz
hadoop-2.6.4-src.tar.gz 源代碼

mahout
apache-mahout-distribution-0.12.2.tar.gz

hive
apache-hive-2.1.0-bin.tar.gz
mysql-connector-java-5.1.39.tar.gz 數(shù)據(jù)庫JDBC驅(qū)動

spark
spark-1.6.2-bin-hadoop2.6.tgz 選版本對應(yīng)的
spark-1.6.2.tgz 源代碼

hbase
zookeeper-3.4.8.tar.gz 提前裝好zookeeper
hbase-1.2.2-bin.tar.gz

storm
apache-storm-1.0.2.tar.gz
apache-storm-1.0.2-src.tar.gz 源代碼

sqoop
sqoop-1.99.6-bin-hadoop200.tar.gz

集群的三種模式

local(單機)

standalone

yarn


配置清單

JDK

profile

export JAVA_HOME=/usr/local/jdk1.8.0_91
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin

scala

profile

# scala
export SCALA_HOME=/usr/local/scala-2.10.6
export PATH=$PATH:$SCALA_HOME/bin

Hadoop

profile

# hadoop
export HADOOP_HOME=/usr/local/hadoop-2.6.4
export HADOOP_PREFIX=$HADOOP_HOME
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

*.env

$HADOOP_HOME/etc/hadoop/hadoop-env.sh, $HADOOP_HOME/etc/hadoop/mapred-env.sh, $HADOOP_HOME/etc/hadoop/yarn-env.sh

source ~/.bash_profile

# 對于 yarn-env.sh 只需要配置 JAVA_HOME 即可,否則會出現(xiàn)找不到ResourceManager、NodeManager類的問題
# Error: Could not find or load main class org.apache.hadoop.yarn.server.resourcemanager.ResourceManager
# Error: Could not find or load main class org.apache.hadoop.yarn.server.nodemanager.NodeManager

slaves

$HADOOP_HOME/etc/hadoop/slaves

slave1
slave2

core-site

$HADOOP_HOME/etc/hadoop/core-site.xml

<configuration>
    <property>
            <name>hadoop.tmp.dir</name>
            <value>/home/hadoop/tmp</value>
            <description>A base for other temporary directories.</description>
    </property>

    <property>
            <name>fs.default.name</name>
            <value>hdfs://master:9000</value>
            <final>true</final>
            <description>The name of the default file system.  A URI whose
            scheme and authority determine the FileSystem implementation.  The
            uri's scheme determines the config property (fs.SCHEME.impl) naming
            the FileSystem implementation class.  The uri's authority is used to
            determine the host, port, etc. for a filesystem.</description>
    </property>
</configuration>

hdfs-site

$HADOOP_HOME/etc/hadoop/hdfs-site.xml

<configuration>
  <property>
    <name>dfs.datanode.ipc.address</name>
    <value>0.0.0.0:50020</value>
  </property>
  <property>
    <name>dfs.datanode.http.address</name>
    <value>0.0.0.0:50075</value>
  </property>
  <property>
    <name>dfs.replication</name>
    <value>2</value>
  </property>
</configuration>

mapred-site

$HADOOP_HOME/etc/hadoop/mapred-site.xml

<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>

    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>master:10020</value>
    </property>

    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>master:19888</value>
    </property>
</configuration>

yarn-site

$HADOOP_HOME/etc/hadoop/yarn-site.xml

<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>

    <property>
        <name>yarn.resourcemanager.address</name>
        <value>master:8032</value>
    </property>

    <property>
        <name>yarn.resourcemanager.scheduler.address</name>
        <value>master:8030</value>
    </property>

    <property>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>master:8031</value>
    </property>
</configuration>

啟動HDFS

# 初始化 hdfs
$HADOOP_HOME/bin/hdfs namenode -format

# 啟動 hdfs
$HADOOP_HOME/sbin/start-dfs.sh

# 啟動 yarn管理集群
$HADOOP_HOME/sbin/start-yarn.sh

# jobhistory啟動
$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver

測試

$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.4.jar wordcount <INPUT> <OUTPUT>

端口

端口號 描述
8088 All Applications(yarn-web-ui)
9000 hdfs端口
50070 Namenode information(web-ui)
50090 SecondaryNamenode information(web-ui)
19888 JobHistory(web-ui)

pig

profile

# pig
export PIG_HOME=/usr/local/pig-0.16.0
export PIG_CLASS=$HADOOP_HOME/etc/hadoop
export PATH=$PATH:$PIG_HOME/bin

spark

profile

# spark
export SPARK_HOME=/usr/local/spark-1.6.2-bin-hadoop2.6
export PATH=$PATH:$SPARK_HOME/bin

spark-env

$SPARK_HOME/conf/spark-env.sh

# local, standalone, yarn模式
source ~/.bash_profile
export SPARK_MASTER_IP=master

# jobhistroy
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=7777 -Dspark.history.retainedApplications=2 -Dspark.history.fs.logDirectory=hdfs://master:9000/sparklog"

# 其它配置
export SPARK_WORKER_CORES=2
export SPARK_WORKER_MEMORY=1G

slaves

$SPARK_HOME/conf/slaves

slave1
slave2

spark-default.xml

# jobhistory配置
spark.eventLog.enabled  true
spark.eventLog.dir      hdfs://master:9000/sparklog
spark.eventLog.compress true

啟動spark

# 啟動計算框架集群
$SPARK_HOME/sbin/start-all.sh

# 啟動HistoryServer
$SPARK_HOME/sbin/start-history-server.sh

測試

# 1
$SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.SparkPi $SPARK_HOME/lib/examples-1.6.2-hadoop2.6.0.jar

# 2
MASTER=local && $SPARK_HOME/bin/run-example SparkPi

端口

端口號 描述
4040 App(web-ui)
7077 Master
7777 History(web-ui)

zookeeper

profile

# zookeeper
export ZOOKEEPER_HOME=/usr/local/zookeeper-3.4.8
export PATH=$PATH:$ZOOKEEPER_HOME/bin

zoo.cfg

# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial 
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between 
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
dataDir=/home/zookeeper/data
# dataLogDir=/home/zookeeper/logs
# the port at which the clients will connect
clientPort=2181

server.1=master:2888:3888
server.2=slave1:2888:3888
server.3=slave2:2888:3888

myid

創(chuàng)建 myid 文件 /home/zookeeper/data/myid

# 不同的機器需要分配不同的 id,序號與上述 zoo.cfg 中的 server.* 中的數(shù)字對應(yīng)
echo 1 >> /home/zookeeper/data/myid

啟動

$ZOOKEEPER_HOME/bin/zkServer.sh start

端口

端口號 描述
2181 Zookeeper-Client
2888 from
3888 to

hbase

profile

# hbase
export HBASE_HOME=/usr/local/hbase-1.2.2
export PATH=$PATH:$HBASE_HOME/bin

hbase-env

$HBASE_HOME/conf/hbase-env.sh

source ~/.bash_profile
# export JAVA_HOME HADOOP_HOME HBASE_HOME
export HBASE_CLASSPATH=$HADOOP_HOME/etc/hadoop
export HBASE_MANAGES_ZK=true
export HBASE_LOG_DIR=$HBASE_HOME/logs

regionservers

slave1
slave2

hbase-site.xml

$HBASE_HOME/conf/hbase-site.xml

<configuration>
  <property>
    <name>hbase.master</name>
    <value>master:6000</value>
  </property>
  <property>
    <name>hbase.master.maxclockskew</name>
    <value>180000</value>
  </property>
  <property>
    <name>hbase.rootdir</name>
    <value>hdfs://master:9000/hbase</value>
  </property>
  <property>
    <name>hbase.cluster.distributed</name>
    <value>true</value>
  </property>
  <property>
    <name>hbase.zookeeper.quorum</name>
    <value>master,slave1,slave2</value>
  </property>
  <property>
    <name>hbase.zookeeper.property.dataDir</name>
    <value>/home/zookeeper/data</value>
  </property>
  <property>
    <name>dfs.replication</name>
    <value>1</value>
  </property>
</configuration>

啟動HBase

$HBASE_HOME/bin/start-hbase.sh

測試

$HBASE_HOME/bin/hbase-shell

端口

端口號 描述
16010 HBase(web-ui)

hive

profile

# hive
export HIVE_HOME=/usr/local/apache-hive-2.1.0-bin
export PATH=$PATH:$HIVE_HOME/bin

hive-env.sh

$HIVE_HOME/conf/hive-env.sh

source ~/.bash_profile
export HIVE_CONF_DIR=$HIVE_HOME/conf

hive-site.xml

$HIVE_HOME/conf/hive-site.xml

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.jdbc.Driver</value>
    <description>Driver class name for a JDBC metastore</description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://master:3306/hive?createDatabaseIfNotExist=true</value>
    <description>JDBC connect string for a JDBC metastore</description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>hive</value>
    <description>username to use against metastore database</description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>hive</value>
    <description>password to use against metastore database</description>
  </property>
  <property>
    <name>hive.metastore.warehouse.dir</name>
    <value>hdfs://master:9000/user/hive/warehouse</value>
  </property>
    <property>
        <name>hive.hwi.listen.host</name>
        <value>0.0.0.0</value>
    </property>
    <property>
        <name>hive.hwi.listen.port</name>
        <value>9999</value>
    </property>
    <property>
        <name>hive.hwi.war.file</name>
        <value>lib/hive-hwi-2.1.0.war</value>
    </property>
</configuration>

初始化

測試

$HIVE_HOME/bin/hive

sqoop

profile

# sqoop
export SQOOP_HOME=/usr/local/sqoop-1.99.6-bin-hadoop200
export PATH=$PATH:$SQOOP_HOME/bin
export CATALINE_BASE=$SQOOP_HOME/server
export LOGDIR=$SQOOP_HOME/logs

測試

# 啟動
$SQOOP_HOME/bin/sqoop2-server start

# cli
$SQOOP_HOME/bin/sqoop2-shell

mahout

storm

第三方包管理工具

maven

下載
配置 path 路徑即可

sbt

Homebrew (Third-party package)

$ brew install sbt

Macports (Third-party package)

$ port install sbt

下載

未完待續(xù)...

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容