Hadoop+Hive+Mysql+Zookeeper+Kafka+Scala+Spark的大數(shù)據(jù)環(huán)境搭建

一、前期準(zhǔn)備

1、服務(wù)器

三臺(tái) centos7+ 服務(wù)器

10.1.80.128   #主節(jié)點(diǎn)
10.1.80.129
10.1.80.130

三臺(tái)均修改hosts

vim /etc/hosts

#增加
10.1.80.128   master
10.1.80.129   slave1
10.1.80.130   slave2

三臺(tái)均關(guān)閉防火墻

systemctl status firewalld.service
systemctl stop firewalld.service
systemctl disable firewalld.service

2、三臺(tái)服務(wù)器互聯(lián)免密

#以root用戶(hù)登錄,更改ssh配置文件 
vim /etc/ssh/sshd_config
#啟用rsa認(rèn)證 
RSAAuthentication yes   
#啟用公鑰私鑰配對(duì)認(rèn)證方式 
PubkeyAuthentication yes   
#公鑰文件路徑
AuthorizedKeysFile .ssh/authorized_keys    
//系統(tǒng)在/root/.ssh下生成id_rsa、id_rsa.pub按回車(chē)即可
ssh-keygen -t rsa
//把A機(jī)下的id_rsa.pub發(fā)送到B、C機(jī)
ssh-copy-id -i /root/.ssh/id_rsa.pub slave1
ssh-copy-id -i /root/.ssh/id_rsa.pub slave2
//驗(yàn)證
//登錄B機(jī) 
ssh slave1 
//檢查是否成功登錄 
ifconfig 
//退出ssh登錄,返回本機(jī)
exit

3、安裝jdk

下載地址:https://www.oracle.com/java/technologies/javase-downloads.html

# 將java安裝包移動(dòng)到相應(yīng)的路徑
mv jdk-8u201-linux-x64.tar.gz /usr/local

# 解壓
tar -zxvf jdk-8u201-linux-x64.tar.gz

# 重命名文件夾為java
mv jdk1.8.0_201 java

# 添加java環(huán)境到配置/etc/profile文件  vim /etc/profile 
export JAVA_HOME=/usr/local/java
export JRE_HOME=/usr/local/java/jre
export CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib:$JRE_HOME/lib
export PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin

#激活環(huán)境變量 
source /etc/profile

二、hadoop

1、下載 :

https://mirror.bit.edu.cn/apache/hadoop/common/hadoop-3.2.1/

2、安裝

安裝

#移動(dòng)至 /usr/local
mv hadoop-3.2.1.tar.gz   /usr/local/
#進(jìn)入 /usr/local 目錄
cd /usr/local
#解壓
tar -zxvf hadoop-3.2.1.tar.gz
#重命名
mv hadoop-3.2.1  hadoop
#刪除壓縮包
rm -rf hadoop-3.2.1.tar.gz

配置環(huán)境變量

vim /etc/profile

#增加
#hadoop
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

#激活
source /etc/profile

#成功可查看版本
hadoop version

3、修改hadoop相關(guān)的配置文件

#創(chuàng)建data目錄 /usr/local/hadoop/data
cd /usr/local/hadoop
mkdir  data

hadoop-env.sh與yarn-env.sh文件配置

cd /usr/local/hadoop/etc/hadoop

# hadoop-env.sh 文件引入JAVA_HOME環(huán)境變量及root用戶(hù)
vim hadoop-env.sh
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export JAVA_HOME=/usr/local/java

# yarn-env.sh 文件引入JAVA_HOME環(huán)境變量
vim yarn-env.sh

export JAVA_HOME=/usr/local/java

core-site.xml文件配置

vim core-site.xml

#設(shè)置hdfs集群對(duì)外提供的目錄
#以及hadoop的公共目錄
<configuration>

<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>

<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/data</value>
</property>

</configuration>

hdfs-site.xml文件配置

vim hdfs-site.xml

#設(shè)置hdfs的一些目錄,放在/usr/local/hadoop/data的目錄下面
#設(shè)置副本數(shù)為3


<property>
  <name>dfs.name.dir</name>
  <value>/usr/local/hadoop/data/namenode</value>
</property>
<property>
  <name>dfs.data.dir</name>
  <value>/usr/local/hadoop/data/datanode</value>
</property>
<property>
  <name>dfs.tmp.dir</name>
  <value>/usr/local/hadoop/data/tmp</value>
</property>
<property>
  <name>dfs.replication</name>  
  <value>2</value>  
</property>

mapred-site.xml文件配置

vim mapred-site.xml
#指定MapReduce程序應(yīng)該放在哪個(gè)資源調(diào)度集群上運(yùn)行。
#若不指定為yarn,那么MapReduce程序就只會(huì)在本地運(yùn)行而非在整個(gè)集群中運(yùn)行。
#高版本不再有jobtracker和tasktracker配置了

<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>

yarn-site.xml文件配置

vim yarn-site.xml

#指定yarn集群中的老大
#配置yarn集群中的重節(jié)點(diǎn),指定map產(chǎn)生的中間結(jié)果傳遞給reduce采用的機(jī)制是shuffle

<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>

works文件配置主從關(guān)系

vim works

slave1
slave2

4、啟動(dòng)及驗(yàn)證

#格式化HDFS文件系統(tǒng),只需一次
hadoop namenode -format

#bin目錄下
start-all.sh

\color{red}{啟動(dòng)后可能出現(xiàn)的錯(cuò)誤:}
\color{red}{1、root身份啟動(dòng)導(dǎo)致的錯(cuò)誤}

ERROR: Attempting to operate on yarn nodemanager as root
ERROR: but there is no YARN_NODEMANAGER_USER defined. Aborting operation.
Stopping resourcemanager
ERROR: Attempting to operate on yarn resourcemanager as root
ERROR: but there is no YARN_RESOURCEMANAGER_USER defined. Aborting operation.

\color{red}{修改方法如下}

#/usr/local/hadoop/sbin 目錄下
#start-dfs.sh 和 stop-dfs.sh文件頂部
HDFS_DATANODE_USER=root
HADOOP_SECURE_DN_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root

#start-yarn.sh,stop-yarn.sh文件頂部
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root

\color{red}{2、slave的DataNode沒(méi)啟動(dòng)}

#多次格式化之后導(dǎo)致節(jié)點(diǎn)clusterID不一致
#修改分節(jié)點(diǎn)的clusterID和主節(jié)點(diǎn)一致

#主節(jié)點(diǎn)的在namenode下
vim /usr/local/hadoop/data/namenode/current/VERSION 

#分節(jié)點(diǎn)的在datanode下
vim /usr/local/hadoop/data/datanode/current/VERSION

\color{red}{同理還會(huì)出現(xiàn)storageID一樣導(dǎo)致的slave只能啟動(dòng)一個(gè)的問(wèn)題。}
\color{red}{ 暴力處理方案:刪除current文件夾重啟。}

驗(yàn)證是否成功

#方法一
jps

主節(jié)點(diǎn)
18800 SecondaryNameNode
19043 ResourceManager
19395 Jps
18533 NameNode

從節(jié)點(diǎn)
5177 NodeManager
5067 DataNode
5307 Jps
#方法二
hdfs dfsadmin -report
信息如下

Live datanodes (2):

Name: 10.1.80.129:9866 (slave1)
Hostname: slave1
Decommission Status : Normal
Configured Capacity: 18238930944 (16.99 GB)
DFS Used: 8192 (8 KB)
Non DFS Used: 2830036992 (2.64 GB)
DFS Remaining: 15408885760 (14.35 GB)
DFS Used%: 0.00%
DFS Remaining%: 84.48%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Thu Mar 26 16:26:06 CST 2020
Last Block Report: Thu Mar 26 16:20:51 CST 2020
Num of Blocks: 0


Name: 10.1.80.130:9866 (slave2)
Hostname: slave2
Decommission Status : Normal
Configured Capacity: 18238930944 (16.99 GB)
DFS Used: 8192 (8 KB)
Non DFS Used: 2835111936 (2.64 GB)
DFS Remaining: 15403810816 (14.35 GB)
DFS Used%: 0.00%
DFS Remaining%: 84.46%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Thu Mar 26 16:26:06 CST 2020
Last Block Report: Thu Mar 26 16:20:51 CST 2020
Num of Blocks: 0

web端查看yarn集群
http://10.1.80.128:8088/cluster

yarn集群.png

用web查看hdfs
http://10.1.80.128:9870/dfshealth.html#tab-overview

hdfs.png

三、Mysql

1、安裝

#解壓、重命名、放到/usr/local/
tar -zxvf mysql-5.7.29-linux-glibc2.12-x86_64.tar.gz
mv mysql-5.7.29-linux-glibc2.12-x86_64  mysql
mv mysql  /usr/local/

#添加用戶(hù)組,用專(zhuān)門(mén)管理mysql,提高安全
groupadd mysql
useradd -r -g mysql mysql
chown -R mysql:mysql ./

2、配置

mysql配置

#配置mysql服務(wù)
cd /usr/local/mysql
mkdir data

cp /usr/local/mysql/support-files/mysql.server /etc/init.d/mysql
vim  /etc/init.d/mysql
#配置如下
basedir=/usr/local/mysql
datadir=/usr/local/mysql/data

cd  /etc
vim my.cnf
#覆蓋配置如下
[client]
port = 3306
default-character-set=utf8

[mysqld]
# 一般配置選項(xiàng)
basedir = /usr/local/mysql
datadir = /usr/local/mysql/data
port = 3306
character-set-server=utf8
default_storage_engine = InnoDB
sql_mode=STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION
symbolic-links=0


#[mysqld_safe]
#log-error=/var/log/mariadb/mariadb.log
#pid-file=/var/run/mariadb/mariadb.pid

#
# include all files from the config directory
#
!includedir /etc/my.cnf.d

初始化數(shù)據(jù)庫(kù)

cd /usr/local/mysql/bin/
./mysqld --initialize --user=mysql --basedir=/usr/local/mysql --datadir=/usr/local/mysql/data

#打印如下,最后為密碼
2020-03-30T05:45:00.308460Z 0 [Warning] TIMESTAMP with implicit DEFAULT value is deprecated. Please use --explicit_defaults_for_timestamp server option (see documentation for more details).
2020-03-30T05:45:02.054695Z 0 [Warning] InnoDB: New log files created, LSN=45790
2020-03-30T05:45:02.136140Z 0 [Warning] InnoDB: Creating foreign key constraint system tables.
2020-03-30T05:45:02.312420Z 0 [Warning] No existing UUID has been found, so we assume that this is the first time that this server has been started. Generating a new UUID: 9995ae68-7249-11ea-a395-000c29384786.
2020-03-30T05:45:02.314254Z 0 [Warning] Gtid table is not ready to be used. Table 'mysql.gtid_executed' cannot be opened.
2020-03-30T05:45:03.040905Z 0 [Warning] CA certificate ca.pem is self signed.
2020-03-30T05:45:03.456573Z 1 [Note] A temporary password is generated for root@localhost: Heifb*Qeo2#e

#啟動(dòng)
service mysql start

登錄及遠(yuǎn)程配置

cd  /usr/local/mysql/bin/
./mysql -uroot -p

#輸入臨時(shí)密碼后修改密碼
set password=password('123456');

#設(shè)置遠(yuǎn)程訪問(wèn)
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY '123456';
flush privileges;

開(kāi)機(jī)自動(dòng)啟動(dòng)

chkconfig --list
chkconfig --add mysql
chkconfig mysql on

環(huán)境變量

vim /etc/profile

# mysql 
export MYSQL_HOME=/usr/local/mysql
export PATH=$PATH:$MYSQL_HOME/bin

source /etc/profile

四、Hive

1、下載

https://mirrors.tuna.tsinghua.edu.cn/apache/hive/hive-3.1.2/
注意版本: hive-2.x與hadoop-2.x匹配,hive-3.x與hadoop-3.x匹配。

2、安裝與環(huán)境變量配置

安裝

#移動(dòng)至/usr/local/目錄
 mv apache-hive-3.1.2-bin.tar.gz  /usr/local/
 
 #進(jìn)入/usr/local/目錄并解壓
 cd  /usr/local/
 tar -zxvf apache-hive-3.1.2-bin.tar.gz 
 
 #重命名并刪除壓縮包
  mv apache-hive-3.1.2-bin  hive
 rm -rf apache-hive-3.1.2-bin.tar.gz

環(huán)境變量

vim /etc/profile

#hive
export HIVE_HOME=/usr/local/hive
export HIVE_CONF_DIR=${HIVE_HOME}/conf
export CLASSPATH=$CLASSPATH.:{HIVE_HOME}/lib
export PATH=$PATH:${HIVE_HOME}/bin

source /etc/profile

hive --version

3、hive配置

創(chuàng)建HDFS文件夾

hadoop fs -mkdir -p /user/hive/warehouse   # 創(chuàng)建文件夾
hadoop fs -mkdir -p /tmp/hive    # 創(chuàng)建文件夾
hadoop fs -chmod -R 777 /user/hive/warehouse   # 授予權(quán)限
hadoop fs -chmod -R 777 /tmp/hive   # 授予權(quán)限

hadoop fs -ls /

創(chuàng)建temp

#/usr/local/hive 目錄下
mkdir temp
chmod -R 777 temp

hive-site.xml文件配置

cd  /usr/local/hive/conf

#復(fù)制一份配置文件,也可生成空文件自行配置
cp   hive-default.xml.template   hive-site.xml

#增加配置
vim hive-site.xml

#hive配置
 <property>
    <name>hive.exec.local.scratchdir</name>
    <value>/usr/local/hive/root</value>
    <description>Local scratch space for Hive jobs</description>
  </property>
<property>
    <name>hive.downloaded.resources.dir</name>
    <value>/usr/local/hive/${hive.session.id}_resources</value>
    <description>Temporary local directory for added resources in the remote file system.</description>
  </property>
 <property>
    <name>hive.server2.logging.operation.log.location</name>
    <value>/usr/local/hive/root/operation_logs</value>
    <description>Top level directory where operation logs are stored if logging functionality is enabled</description>
  </property> 
<property>
    <name>hive.querylog.location</name>
    <value>/usr/local/hive/root</value>
    <description>Location of Hive run time structured log file</description>
  </property>

#數(shù)據(jù)庫(kù)配置
<property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://master:3306/hive?createDatabaseIfNotExist=true</value>
    <description>
      JDBC connect string for a JDBC metastore.
      To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.
      For example, jdbc:postgresql://myhost/db?ssl=true for postgres database.
    </description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.jdbc.Driver</value>
    <description>Driver class name for a JDBC metastore</description>
  </property>
<property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>root</value>
    <description>Username to use against metastore database</description>
  </property>
<property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>123456</value>
    <description>password to use against metastore database</description>
  </property>
   <property>
    <name>hive.metastore.schema.verification</name>
    <value>false</value>
    <description>
      Enforce metastore schema version consistency.
      True: Verify that version information stored in is compatible with one from Hive jars.  Also disable automatic
            schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures
            proper metastore schema migration. (Default)
      False: Warn if the version information stored in metastore doesn't match with one from in Hive jars.
    </description>
  </property>  

配置hive-log4j2.properties

cp hive-log4j2.properties.template hive-log4j2.properties  

vim hive-log4j2.properties

# 修改內(nèi)容
property.hive.log.dir = /usr/local/hive/temp/root

配置hive-env.sh

cp hive-env.sh.template hive-env.sh
vim hive-env.sh

#前四項(xiàng)可以不寫(xiě)
export JAVA_HOME=/usr/local/java
export HADOOP_HOME=/usr/local/hadoop
export HIVE_HOME=/usr/local/hive
export HIVE_CONF_DIR=${HIVE_HOME}/conf

export HIVE_AUX_JARS_PATH=${HIVE_HOME}/lib

4、連接數(shù)據(jù)庫(kù)配置

下載:https://dev.mysql.com/downloads/connector/j/5.1.html

tar -zxvf mysql-connector-java-5.1.48.tar.gz 

#將該jar包拷貝至 /usr/local/hive/lib
cp -r mysql-connector-java-5.1.48.jar  /usr/local/hive/lib
cp -r mysql-connector-java-5.1.48-bin.jar  /usr/local/hive/lib

5、初始化并啟動(dòng)hive

cd /usr/local/hive/bin
schematool -dbType mysql -initSchema

\color{red}{可能出現(xiàn)的錯(cuò)誤:}
\color{red}{1、hadoop和hive的guava版本不一致導(dǎo)致的}

SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/hive/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Exception in thread "main" java.lang.NoSuchMethodError: com.google.common.base.Preconditions.checkArgument(ZLjava/lang/String;Ljava/lang/Object;)V
    at org.apache.hadoop.conf.Configuration.set(Configuration.java:1357)
    at org.apache.hadoop.conf.Configuration.set(Configuration.java:1338)
    at org.apache.hadoop.mapred.JobConf.setJar(JobConf.java:536)
    at org.apache.hadoop.mapred.JobConf.setJarByClass(JobConf.java:554)
    at org.apache.hadoop.mapred.JobConf.<init>(JobConf.java:448)
    at org.apache.hadoop.hive.conf.HiveConf.initialize(HiveConf.java:5141)
    at org.apache.hadoop.hive.conf.HiveConf.<init>(HiveConf.java:5104)
    at org.apache.hive.beeline.HiveSchemaTool.<init>(HiveSchemaTool.java:96)
    at org.apache.hive.beeline.HiveSchemaTool.main(HiveSchemaTool.java:1473)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.hadoop.util.RunJar.run(RunJar.java:323)
    at org.apache.hadoop.util.RunJar.main(RunJar.java:236)

\color{red}{選擇高版本的}

#將 /usr/local/hadoop/share/hadoop/common/lib中的guava-27.0-jre.jar拷貝至/usr/local/hive/lib

cp /usr/local/hadoop/share/hadoop/common/lib/guava-27.0-jre.jar /usr/local/hive/lib/

#再次
schematool -dbType mysql -initSchema

\color{red}{2、hive-site.xml文件的3215行會(huì)有個(gè)特殊字符報(bào)錯(cuò),刪除 (行數(shù)根據(jù)版本不同不一定,報(bào)錯(cuò)信息會(huì)指出位置)}
成功啟動(dòng)界面如下

Initialization script completed
Mon Mar 30 14:32:36 CST 2020 WARN: Establishing SSL connection without server's identity verification is not recommended. According to MySQL 5.5.45+, 5.6.26+ and 5.7.6+ requirements SSL connection must be established by default if explicit option isn't set. For compliance with existing applications not using SSL the verifyServerCertificate property is set to 'false'. You need either to explicitly disable SSL by setting useSSL=false, or set useSSL=true and provide truststore for server certificate verification.
schemaTool completed

6、進(jìn)入hive終端查看并使用一些常見(jiàn)指令

#進(jìn)入終端
hive

#查看函數(shù)
show functions;

#查看函數(shù)常見(jiàn)信息
desc function sum;

#新建數(shù)據(jù)庫(kù)
create database learn;

#使用當(dāng)前數(shù)據(jù)庫(kù)
use learn;

#創(chuàng)建一個(gè)數(shù)據(jù)表
create table student(id int,name string) row format delimited fields terminated by '\t';

#/home/admin 下新建student.txt文件寫(xiě)入數(shù)據(jù)
001 zhangsan
002 lisi
003 wangwu
004 zhaoliu
005 chenqi

#加載數(shù)據(jù) (注意加上后綴.student)
load data local inpath '/home/admin/student.txt' into table learn.student;

#查看
select * from student;

#mysql中查看表信息
SELECT * FROM hive.TBLS;

hadoop的hdfs系統(tǒng)的web端也可以查看


hdfs查看.png

五、scala+zookeeper+kafka集群

下載地址:注意kakfa和scala的版本要對(duì)應(yīng)

scala:https://www.scala-lang.org/download/2.12.11.html
zookeeper: http://mirror.bit.edu.cn/apache/zookeeper/
kafka: http://kafka.apache.org/downloads

1、Scala

tar -zxvf scala-2.12.11.tgz

mv scala-2.12.11 scala

mv scala /usr/local/

vim /etc/profile
#增加配置
#scala 
export SCALA_HOME=/usr/local/scala
export PATH=$PATH:$SCALA_HOME/bin

source /etc/profile

scala -version

2、Zookeeper

安裝
tar -zxvf zookeeper-3.4.14.tar.gz
mv zookeeper-3.4.14  zookeeper
mv zookeeper /usr/local/

cd /usr/local/zookeeper
mkdir data
mkdir logs
mkdir log
cd /usr/local/zookeeper/conf/

#進(jìn)行配置
cp zoo_sample.cfg  zoo.cfg
vim zoo.cfg

# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial 
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between 
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just 
# example sakes.
dataDir=/usr/local/zookeeper/data
dataLogDir=/usr/local/zookeeper/logs
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
server.0=master:2888:3888   
server.1=slave1:2888:3888
server.2=slave2:2888:3888


#配置myid  master 寫(xiě)0 、slave1 寫(xiě)1 、slave2 寫(xiě)2 且與上面配置的server對(duì)應(yīng)
cd /usr/local/zookeeper/data
echo 1 >myid

開(kāi)機(jī)啟動(dòng)

cd  /etc/rc.d/init.d
touch  zookeeper
vim zookeeper

#配置如下
#!/bin/bash
#chkconfig: 2345 10 90
#description: service zookeeper
export JAVA_HOME=/usr/local/java
export ZOO_LOG_DIR=/usr/local/zookeeper/log
ZOOKEEPER_HOME=/usr/local/zookeeper
su root ${ZOOKEEPER_HOME}/bin/zkServer.sh "$1"

chkconfig --add zookeeper
chkconfig --list

啟動(dòng)

cd /usr/local/zookeeper/bin/
./zkServer.sh start
./zkServer.sh status
jps

3、Kakfa

#安裝
tar -zxvf kafka_2.12-2.2.0.tgz
mv kafka_2.12-2.2.0 kafka
mv kafka /usr/local
cd /usr/local/kafka/
mkdir log_data

#配置
vim /usr/local/kafka/config/server.properties

#主要改這四條就可以
#broker.id 最好對(duì)應(yīng)zookeeper的命名,好記一點(diǎn)
#listeners聽(tīng)本機(jī)
#zookeeper.connect三臺(tái)全要
#日志數(shù)據(jù)文件夾要先創(chuàng)建

broker.id=0
listeners=PLAINTEXT://master:9092
zookeeper.connect=master:2181,slave1:2181,slave2:2181
log.dirs=/usr/local/kafka/log_data

依次啟動(dòng)

/usr/local/kafka/bin/kafka-server-start.sh /usr/local/kafka/config/server.properties
# 后臺(tái)啟動(dòng)
/usr/local/kafka/bin/kafka-server-start.sh -daemon /usr/local/kafka/config/server.properties

測(cè)試

zk創(chuàng)建topic

./kafka-topics.sh --create --zookeeper 10.1.80.128:2181 --replication-factor 3 --partitions 1 --topic luojialei-topic

zk查看topic
./kafka-topics.sh --describe --zookeeper 10.1.80.128:2181 --topic luojialei-topic

一個(gè)生產(chǎn)者
./kafka-console-producer.sh --broker-list 10.1.80.128:9092,10.1.80.129:9092,10.1.80.130:9092 --topic luojialei-topic

三個(gè)消費(fèi)者
./kafka-console-consumer.sh --bootstrap-server 10.1.80.128:9092,10.1.80.129:9092,10.1.80.130:9092 --from-beginning --topic luojialei-topic

以上任一生產(chǎn)者生產(chǎn)數(shù)據(jù),任一消費(fèi)者都能消費(fèi)到

安裝監(jiān)控,選擇KafkaOffsetMonitor:程序以一個(gè)jar包的形式運(yùn)行,部署較為方便。只有監(jiān)控功能,使用起來(lái)也較為安全。

下載:https://github.com/quantifind/KafkaOffsetMonitor

#隨便找一臺(tái)裝一次就好
cd /usr/local/kafka

#jar包放入以下目錄
mkdir kafkaoffsetmonitor

啟動(dòng)腳本

vim start.sh

java -cp KafkaOffsetMonitor-assembly-0.2.0.jar \
    com.quantifind.kafka.offsetapp.OffsetGetterWeb \
    --zk master:2181,slave1:2181,slave2:2181 \
    --port 8788 \
    --refresh 10.seconds \
    --retain 2.days &

./start.sh

web端口查看:http://10.1.80.128:8788/

KafkaMonitor.png

六、Spark

1、下載

https://www.apache.org/dyn/closer.lua/spark/spark-3.0.0-preview2/spark-3.0.0-preview2-bin-hadoop3.2.tgz

2、安裝

tar -zxvf spark-3.0.0-preview2-bin-hadoop3.2.tgz
mv spark-3.0.0-preview2-bin-hadoop3.2  spark
mv  spark  /usr/local

vim /etc/profile
#增加
#spark
export HADOOP_HOME=/usr/local/hadoop   #hadoop配過(guò)可以不要
export SPARK_HOME=/usr/local/spark
export PATH="${HADOOP_HOME}/bin:${SCALA_HOME}/bin:${SPARK_HOME}/bin:$PATH"

source /etc/profile

#查看版本
spark-shell


Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 3.0.0-preview2
      /_/
         
Using Scala version 2.12.10 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_171)

3、配置

cd /usr/local/spark/conf
cp spark-env.sh.template spark-env.sh
vim spark-env.sh

#修改如下
export JAVA_HOME=/usr/local/java
export SCALA_HOME=/usr/local/scala
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_MASTER_HOST=master
export SPARK_LOCAL_DIRS=/usr/local/spark
#內(nèi)存
export SPARK_DRIVER_MEMORY=2g 
#cpus核心數(shù)
export SPARK_WORKER_CORES=2  

cp slaves.template slaves
vim slaves

#修改如下
master
slave1
slave2

4、啟動(dòng)

以上操作在三臺(tái)服務(wù)器配置后

cd /usr/local/spark/sbin
./start-all.sh
./stop-all.sh

#查看,主節(jié)點(diǎn)有Master進(jìn)程,從節(jié)點(diǎn)有Worker進(jìn)程即可
jps

web端口查看:http://10.1.80.128:8080/


spark.png

5、spark-shell的簡(jiǎn)單使用

#創(chuàng)建集合,查看首個(gè)元素
val data= sc.parallelize(Array(1,2,3))
data.first()

#引入本地文件,查看首個(gè)元素
val textFile = sc.textFile("file:/home/admin/text.txt")
textFile.first()

#本地當(dāng)前目錄下的文件
val distFile1 = sc.textFile("data.txt") 
#HDFS文件
val distFile2 = sc.textFile("hdfs://ip:port/user/names.txt") 
#本地指定目錄下的文件
val distFile3 = sc.textFile("file:/input/data.txt") 
#讀取多個(gè)文件
val distFile4 = sc.textFile("/input/data1.txt, /input/data2.txt") 
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書(shū)系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。
禁止轉(zhuǎn)載,如需轉(zhuǎn)載請(qǐng)通過(guò)簡(jiǎn)信或評(píng)論聯(lián)系作者。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容