1.設(shè)備主題
1.1 活躍設(shè)備數(shù)(日、周、月)
需求定義:
日活:當(dāng)日活躍的設(shè)備數(shù)
周活:當(dāng)周活躍的設(shè)備數(shù)
月活:當(dāng)月活躍的設(shè)備數(shù)
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_uv_count;
CREATE external TABLE ads_uv_count (
`dt` string COMMENT '統(tǒng)計(jì)日期',
`day_count` BIGINT COMMENT '當(dāng)日用戶數(shù)量',
`wk_count` BIGINT COMMENT '當(dāng)周用戶數(shù)量',
`mn_count` BIGINT COMMENT '當(dāng)月用戶數(shù)量',
`is_weekend` string COMMENT 'Y,N 是否是周末,用于得到本周最終結(jié)果',
`is_monthend` string COMMENT 'Y,N 是否是月末,用于得到本月最終結(jié)果'
) COMMENT '活躍設(shè)備數(shù)' ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_uv_count/';
2)導(dǎo)入數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_uv_count SELECT
'2020-03-10' dt,
daycount.ct,
wkcount.ct,
mncount.ct,
IF (
date_add(
next_day ('2020-03-10', 'MO') ,- 1
) = '2020-03-10',
'Y',
'N'
),
IF (
last_day('2020-03-10') = '2020-03-10',
'Y',
'N'
)
FROM
(
SELECT
'2020-03-10' dt,
count(*) ct
FROM
dwt_uv_topic
WHERE
login_date_last = '2020-03-10'
) daycount
JOIN (
SELECT
'2020-03-10' dt,
count(*) ct
FROM
dwt_uv_topic
WHERE
login_date_last >= date_add(
next_day ('2020-03-10', 'MO') ,- 7
)
AND login_date_last <= date_add(
next_day ('2020-03-10', 'MO') ,- 1
)
) wkcount ON daycount.dt = wkcount.dt
JOIN (
SELECT
'2020-03-10' dt,
count(*) ct
FROM
dwt_uv_topic
WHERE
date_format(login_date_last, 'yyyy-MM') = date_format('2020-03-10', 'yyyy-MM')
) mncount ON daycount.dt = mncount.dt;
3)查詢導(dǎo)入結(jié)果
hive (gmall)> select * from ads_uv_count;
1.2 每日新增設(shè)備
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_new_mid_count;
CREATE external TABLE ads_new_mid_count (
`create_date` string COMMENT '創(chuàng)建時(shí)間',
`new_mid_count` BIGINT COMMENT '新增設(shè)備數(shù)量'
) COMMENT '每日新增設(shè)備信息數(shù)量'
ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_new_mid_count/';
2)導(dǎo)入數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_new_mid_count SELECT
login_date_first,
count(*)
FROM
dwt_uv_topic
WHERE
login_date_first = '2020-03-10'
GROUP BY
login_date_first;
3)查詢導(dǎo)入數(shù)據(jù)
hive (gmall)> select * from ads_new_mid_count;
1.3 沉默用戶數(shù)
需求定義:
沉默用戶:只在安裝當(dāng)天啟動(dòng)過,且啟動(dòng)時(shí)間是在 7 天前
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_silent_count;
CREATE external TABLE ads_silent_count (
`dt` string COMMENT '統(tǒng)計(jì)日期',
`silent_count` BIGINT COMMENT '沉默設(shè)備數(shù)'
) ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_silent_count';
2)導(dǎo)入 2020-03-20 數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_silent_count SELECT
'2020-03-15',
count(*)
FROM
dwt_uv_topic
WHERE
login_date_first = login_date_last
AND login_date_last <= date_add('2020-03-15' ,- 7);
3)查詢導(dǎo)入數(shù)據(jù)
hive (gmall)> select * from ads_silent_count;
1.4 本周回流用戶數(shù)
需求定義:
本周回流用戶:上周未活躍,本周活躍的設(shè)備,且不是本周新增設(shè)備
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_back_count;
CREATE external TABLE ads_back_count (
`dt` string COMMENT '統(tǒng)計(jì)日期',
`wk_dt` string COMMENT '統(tǒng)計(jì)日期所在周',
`wastage_count` BIGINT COMMENT '回流設(shè)備數(shù)'
) ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_back_count';
2)導(dǎo)入數(shù)據(jù):
hive (gmall) > INSERT INTO TABLE ads_back_count SELECT
'2020-03-15',
count(*)
FROM
(
SELECT
mid_id
FROM
dwt_uv_topic
WHERE
login_date_last >= date_add(
next_day ('2020-03-15', 'MO') ,- 7
)
AND login_date_last <= date_add(
next_day ('2020-03-15', 'MO') ,- 1
)
AND login_date_first < date_add(
next_day ('2020-03-15', 'MO') ,- 7
)
) current_wk
LEFT JOIN (
SELECT
mid_id
FROM
dws_uv_detail_daycount
WHERE
dt >= date_add(
next_day ('2020-03-15', 'MO') ,- 7 * 2
)
AND dt <= date_add(
next_day ('2020-03-15', 'MO') ,- 7 - 1
)
GROUP BY
mid_id
) last_wk ON current_wk.mid_id = last_wk.mid_id
WHERE
last_wk.mid_id IS NULL;
3)查詢結(jié)果
hive (gmall)> select * from ads_back_count;
1.5 流失用戶數(shù)
需求定義:
流失用戶:最近 7 天未活躍的設(shè)備
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_wastage_count;
CREATE external TABLE ads_wastage_count (
`dt` string COMMENT '統(tǒng)計(jì)日期',
`wastage_count` BIGINT COMMENT '流失設(shè)備數(shù)'
) ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_wastage_count';
2)導(dǎo)入 2020-03-20 數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_wastage_count SELECT
'2020-03-20',
count(*)
FROM
(
SELECT
mid_id
FROM
dwt_uv_topic
WHERE
login_date_last <= date_add('2020-03-20' ,- 7)
GROUP BY
mid_id
) t1;
3)查詢結(jié)果
hive (gmall)> select * from ads_wastage_count;
1.6 留存率
需求定義

1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_user_retention_day_rate;
CREATE external TABLE ads_user_retention_day_rate (
`stat_date` string COMMENT '統(tǒng)計(jì)日期',
`create_date` string COMMENT '設(shè)備新增日期',
`retention_day` INT COMMENT '截止當(dāng)前日期留存天數(shù)',
`retention_count` BIGINT COMMENT '留存數(shù)量',
`new_mid_count` BIGINT COMMENT '設(shè)備新增數(shù)量',
`retention_ratio` DECIMAL (10, 2) COMMENT '留存率'
) COMMENT '每日用戶留存情況'
ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_user_retention_day_rate/';
2)導(dǎo)入數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_user_retention_day_rate SELECT
'2020-03-10' ,-- 統(tǒng)計(jì)日期 date_add('2020-03-10' ,- 1) ,-- 新增日期 1 ,-- 留存天數(shù) sum(
IF (
login_date_first = date_add('2020-03-10' ,- 1)
AND login_date_last = '2020-03-10',
1,
0
)
) ,-- 2020 - 03 - 09 的 1 日留存數(shù) sum(
IF (
login_date_first = date_add('2020-03-10' ,- 1),
1,
0
)
) ,-- 2020 - 03 - 09 新增 sum(
IF (
login_date_first = date_add('2020-03-10' ,- 1)
AND login_date_last = '2020-03-10',
1,
0
)
) / sum(
IF (
login_date_first = date_add('2020-03-10' ,- 1),
1,
0
)
) * 100
FROM
dwt_uv_topicunion ALL SELECT
'2020-03-10' ,-- 統(tǒng)計(jì)日期 date_add('2020-03-10' ,- 2) ,-- 新增日期 2 ,-- 留存天數(shù) sum(
IF (
login_date_first = date_add('2020-03-10' ,- 2)
AND login_date_last = '2020-03-10',
1,
0
)
) ,-- 2020 - 03 - 08 的 2 日留存數(shù) sum(
IF (
login_date_first = date_add('2020-03-10' ,- 2),
1,
0
)
) ,-- 2020 - 03 - 08 新增 sum(
IF (
login_date_first = date_add('2020-03-10' ,- 2)
AND login_date_last = '2020-03-10',
1,
0
)
) / sum(
IF (
login_date_first = date_add('2020-03-10' ,- 2),
1,
0
)
) * 100
FROM
dwt_uv_topic
UNION ALL
SELECT
'2020-03-10' ,-- 統(tǒng)計(jì)日期 date_add('2020-03-10' ,- 3) ,-- 新增日期 3 ,-- 留存天數(shù) sum(
IF (
login_date_first = date_add('2020-03-10' ,- 3)
AND login_date_last = '2020-03-10',
1,
0
)
) ,-- 2020 - 03 - 07 的 3 日留存數(shù) sum(
IF (
login_date_first = date_add('2020-03-10' ,- 3),
1,
0
)
) ,-- 2020 - 03 - 07 新增 sum(
IF (
login_date_first = date_add('2020-03-10' ,- 3)
AND login_date_last = '2020-03-10',
1,
0
)
) / sum(
IF (
login_date_first = date_add('2020-03-10' ,- 3),
1,
0
)
) * 100
FROM
dwt_uv_topic;
3)查詢導(dǎo)入數(shù)據(jù)
hive (gmall)>select * from ads_user_retention_day_rate;
1.7 最近連續(xù)三周活躍用戶數(shù)
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_continuity_wk_count;
CREATE external TABLE ads_continuity_wk_count (
`dt` string COMMENT '統(tǒng)計(jì)日期,一般用結(jié)束周周日日期,如果每天計(jì)算一次,可用當(dāng)天日 期',
`wk_dt` string COMMENT '持續(xù)時(shí)間',
`continuity_count` BIGINT COMMENT '活躍次數(shù)'
) ROW format delimited FIELDS TERMINATED BY '\t' location '/warehouse/gmall/ads/ads_continuity_wk_count';
2)導(dǎo)入 2020-03-20 所在周的數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_continuity_wk_count SELECT
'2020-03-15',
concat(
date_add(
next_day ('2020-03-15', 'MO') ,- 7 * 3
),
'_',
date_add(
next_day (' 2020-03-15', 'MO') ,- 1
)
),
count(*)
FROM
(
SELECT
mid_id
FROM
(
selectmid_id
FROM
dws_uv_detail_daycount
WHERE
dt >= date_add(
next_day ('2020-03-10', 'monday') ,- 7
)
AND dt <= date_add(
next_day ('2020-03-10', 'monday') ,- 1
)
GROUP BY
mid_id
UNION ALL
SELECT
mid_id
FROM
dws_uv_detail_daycount
WHERE
dt >= date_add(
next_day ('2020-03-10', 'monday') ,- 7 * 2
)
AND dt <= date_add(
next_day ('2020-03-10', 'monday') ,- 7 - 1
)
GROUP BY
mid_id
UNION ALL
SELECT
mid_id
FROM
dws_uv_detail_daycount
WHERE
dt >= date_add(
next_day ('2020-03-10', 'monday') ,- 7 * 3
)
AND dt <= date_add(
next_day ('2020-03-10', 'monday') ,- 7 * 2 - 1
)
GROUP BY
mid_id
) t1
GROUP BY
mid_id
HAVING
count(*) = 3
) t2
3)查詢
hive (gmall)> select * from ads_continuity_wk_count;
1.8 最近七天內(nèi)連續(xù)三天活躍用戶數(shù)
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_continuity_uv_count;
CREATE external TABLE ads_continuity_uv_count (
`dt` string COMMENT '統(tǒng)計(jì)日期',
`wk_dt` string COMMENT '最近 7 天日期',
`continuity_count` BIGINT
) COMMENT '連續(xù)活躍設(shè)備數(shù)'
ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_continuity_uv_count';
2)寫出導(dǎo)入數(shù)據(jù)的 SQL 語句
2)寫出導(dǎo)入數(shù)據(jù)的 SQL 語句 hive (gmall) > INSERT INTO TABLE ads_continuity_uv_count SELECT
'2020-03-12',
concat(
date_add('2020-03-12' ,- 6),
'_',
'2020-03-12'
),
count(*)
FROM
(
SELECT
mid_id
FROM
(
SELECT
mid_id
FROM
(
SELECT
mid_id,
date_sub(dt, rank) date_dif
FROM
(
SELECT
mid_id,
dt,
rank () over (PARTITION BY mid_id ORDER BY dt) rank
FROM
dws_uv_detail_daycount
WHERE
dt >= date_add('2020-03-12' ,- 6)
AND dt <= '2020-03-12'
) t1
) t2
GROUP BY
mid_id,
date_dif
HAVING
count(*) >= 3
) t3
GROUP BY
mid_id
) t4;
3)查詢
hive (gmall)> select * from ads_continuity_uv_count;
2.會(huì)員主題
2.1 會(huì)員主題信息
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_user_topic;
CREATE external TABLE ads_user_topic (
`dt` string COMMENT '統(tǒng)計(jì)日期',
`day_users` string COMMENT '活躍會(huì)員數(shù)',
`day_new_users` string COMMENT '新增會(huì)員數(shù)',
`day_new_payment_users` string COMMENT '新增消費(fèi)會(huì)員數(shù)',
`payment_users` string COMMENT '總付費(fèi)會(huì)員數(shù)',
`users` string COMMENT '總會(huì)員數(shù)',
`day_users2users` DECIMAL (10, 2) COMMENT '會(huì)員活躍率',
`payment_users2users` DECIMAL (10, 2) COMMENT '會(huì)員付費(fèi)率',
`day_new_users2users` DECIMAL (10, 2) COMMENT '會(huì)員新鮮度'
) COMMENT '會(huì)員主題信息表'
ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_user_topic';
2)導(dǎo)入數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_user_topic SELECT
'2020-03-10',
sum(
IF (
login_date_last = '2020-03-10',
1,
0
)
),
sum(
IF (
login_date_first = '2020-03-10',
1,
0
)
),
sum(
IF (
payment_date_first = '2020-03-10',
1,
0
)
),
sum(IF(payment_count > 0, 1, 0)),
count(*),
sum(
IF (
login_date_last = '2020-03-10',
1,
0
)
) / count(*),
sum(IF(payment_count > 0, 1, 0)) / count(*),
sum(
IF (
login_date_first = '2020-03-10',
1,
0
)
) / sum(
IF (
login_date_last = '2020-03-10',
1,
0
)
)
FROM
dwt_user_topic
3)查詢導(dǎo)入結(jié)果
hive (gmall)> select * from ads_user_topic;
4)在/home/atguigu/bin 目錄下編寫腳本 ads_user_topic.sh
[atguigu@hadoop102 bin]$ vim ads_user_topic.sh
添加如下內(nèi)容:
#!/bin/bash
APP=gmall
hive=/opt/module/hive/bin/hive
# 如果是輸入的日期按照取輸入日期;如果沒輸入日期取當(dāng)前時(shí)間的前一天
if [ -n "$1" ] ;
then
do
_date=$1
else
do
_date=`date -d "-1 day" +%F`
fisql="
WITH tmp_day_users AS (
SELECT
'$do_date' dt,
count(*) day_users from$ { APP }.dwt_user_topic wherelogin_date_last = '$do_date'
),
tmp_day_new_users AS (
SELECT
'$do_date' dt,
count(*) day_new_users from$ { APP }.dwt_user_topic wherelogin_date_last = '$do_date'
AND login_date_first = '$do_date'
),
tmp_day_new_payment_users AS (
SELECT
'$do_date' dt,
count(*) day_new_payment_users from$ { APP }.dwt_user_topic wherepayment_date_first = '$do_date'
),
tmp_payment_users AS (
SELECT
'$do_date' dt,
count(*) payment_users from$ { APP }.dwt_user_topic wherepayment_date_first IS NOT NULL
),
tmp_users AS (
SELECT
'$do_date' dt,
count(*) users from$ { APP }.dwt_user_topic tmp_users
) INSERT INTO TABLE $ { APP }.ads_user_topic SELECT
'$do_date' dt,
day_users,
day_new_users,
day_new_payment_users,
payment_users,
users,
day_users / users,
payment_users / users,
day_new_users / users fromtmp_day_users
JOIN tmp_day_new_users ON tmp_day_users.dt = tmp_day_new_users.dt
JOIN tmp_day_new_payment_users ON tmp_day_users.dt = tmp_day_new_payment_users.dt
JOIN tmp_payment_users ON tmp_day_users.dt = tmp_payment_users.dt
JOIN tmp_users ontmp_day_users.dt = tmp_users.dt;
"
$hive -e "$sql"
5)增加腳本執(zhí)行權(quán)限
[atguigu@hadoop102 bin]$ chmod 777 ads_user_topic.sh
6)執(zhí)行腳本導(dǎo)入數(shù)據(jù)
[atguigu@hadoop102 bin]$ ads_user_topic.sh 2020-03-11
7)查看導(dǎo)入數(shù)據(jù)
hive (gmall)> select * from ads_user_topic;
2.2 漏斗分析
統(tǒng)計(jì)“瀏覽->購物車->下單->支付”的轉(zhuǎn)化率
思路:統(tǒng)計(jì)各個(gè)行為的人數(shù),然后計(jì)算比值。
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_user_action_convert_day;
CREATE external TABLE ads_user_action_convert_day (
`dt` string COMMENT '統(tǒng)計(jì)日期',
`total_visitor_m_count` BIGINT COMMENT '總訪問人數(shù)',
`cart_u_count` BIGINT COMMENT '加入購物車的人數(shù)',
`visitor2cart_convert_ratio` DECIMAL (10, 2) COMMENT '訪問到加入購物車轉(zhuǎn)化率',
`order_u_count` BIGINT COMMENT '下單人數(shù)',
`cart2order_convert_ratio` DECIMAL (10, 2) COMMENT '加入購物車到下單轉(zhuǎn)化率',
`payment_u_count` BIGINT COMMENT '支付人數(shù)',
`order2payment_convert_ratio` DECIMAL (10, 2) COMMENT '下單到支付的轉(zhuǎn)化率'
) COMMENT '用戶行為漏斗分析'
ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_user_action_convert_day/';
2)數(shù)據(jù)裝載
hive (gmall) > INSERT INTO TABLE ads_user_action_convert_day SELECT
'2020-03-10',
uv.day_count,
ua.cart_count,
cast(
ua.cart_count / uv.day_count AS DECIMAL (10, 2)
) visitor2cart_convert_ratio,
ua.order_count,
cast(
ua.order_count / ua.cart_count AS DECIMAL (10, 2)
) visitor2order_convert_ratio,
ua.payment_count,
cast(
ua.payment_count / ua.order_count AS DECIMAL (10, 2)
) order2payment_convert_ratio
FROM
(
SELECT
dt,
sum(IF(cart_count > 0, 1, 0)) cart_count,
sum(IF(order_count > 0, 1, 0)) order_count,
sum(IF(payment_count > 0, 1, 0)) payment_count
FROM
dws_user_action_daycount
WHERE
dt = '2020-03-10'
GROUP BY
dt
) ua
JOIN ads_uv_count uv ON uv.dt = ua.dt;
3)查詢導(dǎo)入結(jié)果
hive (gmall)> select * from ads_user_action_convert_day;
3.商品主題
3.1 商品個(gè)數(shù)信息
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_product_info;
CREATE external TABLE ads_product_info (
`dt` string COMMENT '統(tǒng)計(jì)日期',
`sku_num` string COMMENT 'sku 個(gè)數(shù)',
`spu_num` string COMMENT 'spu 個(gè)數(shù)'
) COMMENT '商品個(gè)數(shù)信息'
ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_product_info';
2)導(dǎo)入數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_product_infoselect '2020-03-10' dt,
sku_num,
spu_num
FROM
(
SELECT
'2020-03-10' dt,
count(*) sku_num fromdwt_sku_topic
) tmp_sku_num
JOIN (
SELECT
'2020-03-10' dt,
count(*) spu_num
FROM
(
SELECT
spu_id fromdwt_sku_topic
GROUP BY
spu_id
) tmp_spu_id
) tmp_spu_num ON tmp_sku_num.dt = tmp_spu_num.dt;
3)查詢導(dǎo)入結(jié)果
hive (gmall)> select * from ads_product_info;
3.2 商品銷量排名
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_product_sale_topN;
CREATE external TABLE ads_product_sale_topN (
`dt` string COMMENT '統(tǒng)計(jì)日期',
`sku_id` string COMMENT '商品 ID',
`payment_amount` BIGINT COMMENT '銷量'
) COMMENT '商品個(gè)數(shù)信息'
ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_product_sale_topN';
2)導(dǎo)入數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_product_sale_topN SELECT
'2020-03-10' dt,
sku_id,
payment_amount fromdws_sku_action_daycount wheredt = '2020-03-10'
ORDER BY
payment_amount DESC
LIMIT 10;
3)查詢導(dǎo)入結(jié)果
hive (gmall)> select * from ads_product_sale_topN;
3.3 商品收藏排名
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_product_favor_topN;
CREATE external TABLE ads_product_favor_topN (
`dt` string COMMENT '統(tǒng)計(jì)日期',
`sku_id` string COMMENT '商品 ID',
`favor_count` BIGINT COMMENT '收藏量'
) COMMENT '商品收藏 TopN'
ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_product_favor_topN';
2)導(dǎo)入數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_product_favor_topN SELECT
'2020-03-10' dt,
sku_id,
favor_count from
dws_sku_action_daycount wheredt = '2020-03-10'
ORDER BY
favor_count DESC
LIMIT 10;
3)查詢導(dǎo)入結(jié)果
hive (gmall)> select * from ads_product_favor_topN;
3.4 商品加入購物車排名
1)建表語句
DROP TABLE
IF EXISTS ads_product_cart_topN;
CREATE external TABLE ads_product_cart_topN (
`dt` string COMMENT '統(tǒng)計(jì)日期',
`sku_id` string COMMENT '商品 ID',
`cart_num` BIGINT COMMENT '加入購物車數(shù)量'
) COMMENT '商品加入購物車 TopN'
ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_product_cart_topN';
2)導(dǎo)入數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_product_cart_topN SELECT
'2020-03-10' dt,
sku_id,
cart_num
FROM
dws_sku_action_daycount wheredt = '2020-03-10'
ORDER BY
cart_num DESC
LIMIT 10;
3)查詢導(dǎo)入結(jié)果
hive (gmall)> select * from ads_product_cart_topN;
3.5 商品退款率排名(最近 30 天)
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_product_refund_topN;
CREATE external TABLE ads_product_refund_topN (
`dt` string COMMENT '統(tǒng)計(jì)日期',
`sku_id` string COMMENT '商品 ID',
`refund_ratio` DECIMAL (10, 2) COMMENT '退款率'
) COMMENT '商品退款率 TopN'
ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_product_refund_topN';
2)導(dǎo)入數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_product_refund_topN SELECT
'2020-03-10',
sku_id,
refund_last_30d_count / payment_last_30d_count * 100 refund_ratio
FROM
dwt_sku_topic
ORDER BY
refund_ratio DESC
LIMIT 10;
3)查詢導(dǎo)入結(jié)果
hive (gmall)> select * from ads_product_refund_topN;
3.6 商品差評率
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_appraise_bad_topN;
CREATE external TABLE ads_appraise_bad_topN (
`dt` string COMMENT '統(tǒng)計(jì)日期',
`sku_id` string COMMENT '商品 ID',
`appraise_bad_ratio` DECIMAL (10, 2) COMMENT '差評率'
) COMMENT '商品差評率 TopN'
ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_appraise_bad_topN';
2)導(dǎo)入數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_appraise_bad_topN SELECT
'2020-03-10' dt,
sku_id,
appraise_bad_count / (
appraise_good_count + appraise_mid_count + appraise_bad_coun t + appraise_default_count
) appraise_bad_ratio fromdws_sku_action_daycount
WHERE
dt = '2020-03-10'
ORDER BY
appraise_bad_ratio DESC
LIMIT 10;
3)查詢導(dǎo)入結(jié)果
hive (gmall)> select * from ads_appraise_bad_topN;
4.營銷主題(用戶+商品+購買行為)
4.1 下單數(shù)目統(tǒng)計(jì)
需求分析:統(tǒng)計(jì)每日下單數(shù),下單金額及下單用戶數(shù)。
hive (gmall) > DROP TABLE
IF EXISTS ads_order_daycount;
CREATE external TABLE ads_order_daycount (
dt string COMMENT '統(tǒng)計(jì)日期',
order_count BIGINT COMMENT '單日下單筆數(shù)',
order_amount BIGINT COMMENT '單日下單金額',
order_users BIGINT COMMENT '單日下單用戶數(shù)'
) COMMENT '每日訂單總計(jì)表'
ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_order_daycount';
2)導(dǎo)入數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_order_daycount SELECT
'2020-03-10',
sum(order_count),
sum(order_amount),
sum(IF(order_count > 0, 1, 0))
FROM
dws_user_action_daycount
WHERE
dt = '2020-03-10';
3)查詢導(dǎo)入結(jié)果
hive (gmall)> select * from ads_order_daycount;
4.2 支付信息統(tǒng)計(jì)
每日支付金額、支付人數(shù)、支付商品數(shù)、支付筆數(shù)以及下單到支付的平均時(shí)長(取自 DWD)
1)建表語句
hive (gmall) > DROP TABLE
IF EXISTS ads_payment_daycount;
CREATE external TABLE ads_payment_daycount (
dt string COMMENT '統(tǒng)計(jì)日期',
order_count BIGINT COMMENT '單日支付筆數(shù)',
order_amount BIGINT COMMENT '單日支付金額',
payment_user_count BIGINT COMMENT '單日支付人數(shù)',
payment_sku_count BIGINT COMMENT '單日支付商品數(shù)',
payment_avg_time DOUBLE COMMENT '下單到支付的平均時(shí)長,取分鐘數(shù)'
) COMMENT '每日訂單總計(jì)表'
ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_payment_daycount';
2)導(dǎo)入數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_payment_daycount SELECT
tmp_payment.dt,
tmp_payment.payment_count,
tmp_payment.payment_amount,
tmp_payment.payment_user_count,
tmp_skucount.payment_sku_count,
tmp_time.payment_avg_time
FROM
(
SELECT
'2020-03-15' dt,
sum(payment_count) payment_count,
sum(payment_amount) payment_amount,
sum(IF(payment_count > 0, 1, 0)) payment_user_count
FROM
dws_user_action_daycount
WHERE
dt = '2020-03-15'
) tmp_payment
JOIN (
SELECT
'2020-03-15' dt,
sum(IF(payment_count > 0, 1, 0)) payment_sku_count
FROM
dws_sku_action_daycount
WHERE
dt = '2020-03-15'
) tmp_skucount ON tmp_payment.dt = tmp_skucount.dt
JOIN (
SELECT
'2020-03-15' dt,
sum(
unix_timestamp(payment_time) - unix_timestamp(create_time)
) / count(*) / 60 payment_avg_time
FROM
dwd_fact_order_info
WHERE
dt = '2020-03-15'
AND payment_time IS NOT NULL
) tmp_time ON tmp_payment.dt = tmp_time.dt
3)查詢導(dǎo)入結(jié)果
hive (gmall)> select * from ads_payment_daycount;
4.3 復(fù)購率
1)建表語句
hive (gmall) > DROP TABLE ads_sale_tm_category1_stat_mn;
CREATE external TABLE ads_sale_tm_category1_stat_mn (
tm_id string COMMENT '品牌 id',
category1_id string COMMENT '1 級品類 id ',
category1_name string COMMENT '1 級品類名稱 ',
buycount BIGINT COMMENT '購買人數(shù)',
buy_twice_last BIGINT COMMENT '兩次以上購買人數(shù)',
buy_twice_last_ratio DECIMAL (10, 2) COMMENT '單次復(fù)購率',
buy_3times_last BIGINT COMMENT '三次以上購買人數(shù)',
buy_3times_last_ratio DECIMAL (10, 2) COMMENT '多次復(fù)購率',
stat_mn string COMMENT '統(tǒng)計(jì)月份',
stat_date string COMMENT '統(tǒng)計(jì)日期'
) COMMENT '復(fù)購率統(tǒng)計(jì)'
ROW format delimited FIELDS TERMINATED BY '\t'
location '/warehouse/gmall/ads/ads_sale_tm_category1_stat_mn/';
2)導(dǎo)入數(shù)據(jù)
hive (gmall) > INSERT INTO TABLE ads_sale_tm_category1_stat_mn selectmn.sku_tm_id,
mn.sku_category1_id,
mn.sku_category1_name,
sum(IF(mn.order_count >= 1, 1, 0)) buycount,
sum(IF(mn.order_count >= 2, 1, 0)) buyTwiceLast,
sum(IF(mn.order_count >= 2, 1, 0)) / sum(IF(mn.order_count >= 1, 1, 0)) buyTwiceLastRatio,
sum(IF(mn.order_count >= 3, 1, 0)) buy3timeLast,
sum(IF(mn.order_count >= 3, 1, 0)) / sum(IF(mn.order_count >= 1, 1, 0)) buy3timeLastRatio,
date_format('2019-02-10', 'yyyy-MM') stat_mn,
'2019-02-10' stat_date
FROM
(
SELECT
user_id,
sd.sku_tm_id,
sd.sku_category1_id,
sd.sku_category1_name,
sum(order_count) order_count
FROM
dws_sale_detail_daycount sd
WHERE
date_format(dt, 'yyyy-MM') = date_format('2019-02-10', 'yyyy-MM')
GROUP BY
user_id,
sd.sku_tm_id,
sd.sku_category1_id,
sd.sku_category1_name
) mn
GROUP BY
mn.sku_tm_id,
mn.sku_category1_id,
mn.sku_category1_name;
3)查詢導(dǎo)入結(jié)果
hive (gmall)> select * from ads_sale_tm_category1_stat_mn;
5.ADS 層導(dǎo)入腳本
1)在/home/atguigu/bin 目錄下創(chuàng)建腳本 dwt_to_ads.sh
[atguigu@hadoop102 bin]$ vim dwt_to_ads.sh
在腳本中填寫如下內(nèi)容
#!/bin/bash
hive=/opt/module/hive/bin/hive
# 如果是輸入的日期按照取輸入日期;如果沒輸入日期取當(dāng)前時(shí)間的前一天
if [ -n "$1" ] ;
then
do
_date=$1
else
do_date=`date -d "-1 day" +%F`
fisql="use gmall;
INSERT INTO TABLE ads_uv_count SELECT
'$do_date',
sum(
IF (
login_date_last = '$do_date',
1,
0
)
),
sum(
IF (
login_date_last >= date_add(
next_day ('$do_date', 'monday') ,- 7
)
AND login_date_last <= date_add(
next_day ('$do_date', 'monday') ,- 1
),
1,
0
)
),
sum(
IF (
date_format(login_date_last, 'yyyy-MM') = date_format('$do_date', 'yyyy-M M'),
1,
0
)
),
IF (
'$do_date' = date_add(
next_day ('$do_date', 'monday') ,- 1
),
'Y',
'N'
),
IF (
'$do_date' = last_day('$do_date'),
'Y',
'N'
)
FROM
dwt_uv_topic;
INSERT INTO TABLE ads_new_mid_count SELECT
'$do_date',
count(*)
FROM
dwt_uv_topic
WHERE
login_date_first = '$do_date';
INSERT INTO TABLE ads_silent_count SELECT
'$do_date',
count(*)
FROM
dwt_uv_topic
WHERE
login_date_first = login_date_last
AND login_date_last <= date_add('$do_date' ,- 7);
INSERT INTO TABLE ads_back_count SELECT
'$do_date',
concat(
date_add(
next_day ('2020-03-10', 'MO') ,- 7
),
'_',
date_add(
next_day ('2020- 03-10', 'MO') ,- 1
)
),
count(*)
FROM
(
SELECT
mid_id
FROM
dwt_uv_topic
WHERE
login_date_last >= date_add(
next_day ('$do_date', 'MO') ,- 7
)
AND login_date_last <= date_add(
next_day ('$do_date', 'MO') ,- 1
)
AND login_date_first < date_add(
next_day ('$do_date', 'MO') ,- 7
)
) current_wk
LEFT JOIN (
SELECT
mid_id
FROM
dws_uv_detail_daycount
WHERE
dt >= date_add(
next_day ('$do_date', 'MO') ,- 7 * 2
)
AND dt <= date_add(
next_day ('$do_date', 'MO') ,- 7 - 1
)
GROUP BY
mid_id
) last_wk ON current_wk.mid_id = last_wk.mid_id
WHERE
last_wk.mid_id IS NULL;
INSERT INTO TABLE ads_wastage_count SELECT
'$do_date',
count(*)
FROM
dwt_uv_topic
WHERE
login_date_last <= date_add('$do_date' ,- 7);
INSERT INTO TABLE ads_user_retention_day_rate SELECT
'$do_date',
date_add('$do_date' ,- 3),
3,
sum(
IF (
login_date_first = date_add('$do_date' ,- 3)
AND login_date_last = '$do_date',
1,
0
)
),
sum(
IF (
login_date_first = date_add('$do_date' ,- 3),
1,
0
)
),
sum(
IF (
login_date_first = date_add('$do_date' ,- 3)
AND login_date_last = '$do_date',
1,
0
)
) / sum(
IF (
login_date_first = date_add('$do_date', - 3),
1,
0
)
) * 100
FROM
dwt_uv_topic
UNION ALL
SELECT
'$do_date',
date_add('$do_date' ,- 2),
2,
sum(
IF (
login_date_first = date_add('$do_date' ,- 2)
AND login_date_last = '$do_date',
1,
0
)
),
sum(
IF (
login_date_first = date_add('$do_date' ,- 2),
1,
0
)
),
sum(
IF (
login_date_first = date_add('$do_date' ,- 2)
AND login_date_last = '$do_date',
1,
0
)
) / sum(
IF (
login_date_first = date_add('$do_date', - 2),
1,
0
)
) * 100
FROM
dwt_uv_topic
UNION ALL
SELECT
'$do_date',
date_add('$do_date' ,- 1),
1,
sum(
IF (
login_date_first = date_add('$do_date' ,- 1)
AND login_date_last = '$do_date',
1,
0
)
),
sum(
IF (
login_date_first = date_add('$do_date' ,- 1),
1,
0
)
),
sum(
IF (
login_date_first = date_add('$do_date' ,- 1)
AND login_date_last = '$do_date',
1,
0
)
) / sum(
IF (
login_date_first = date_add('$do_date', - 1),
1,
0
)
) * 100
FROM
dwt_uv_topic;
INSERT INTO TABLE ads_continuity_wk_count SELECT
'$do_date',
concat(
date_add(
next_day ('$do_date', 'MO') ,- 7 * 3
),
'_',
date_add(
next_day ('$do_d ate', 'MO') ,- 1
)
),
count(*)
FROM
(
SELECT
mid_id
FROM
(
SELECT
mid_id
FROM
dws_uv_detail_daycount
WHERE
dt >= date_add(
next_day ('$do_date', 'monday') ,- 7
)
AND dt <= date_add(
next_day ('$do_date', 'monday') ,- 1
)
GROUP BY
mid_id
UNION ALL
SELECT
mid_id
FROM
dws_uv_detail_daycount
WHERE
dt >= date_add(
next_day ('$do_date', 'monday') ,- 7 * 2
)
AND dt <= date_add(
next_day ('$do_date', 'monday') ,- 7 - 1
)
GROUP BY
mid_id
UNION ALL
SELECT
mid_id
FROM
dws_uv_detail_daycount
WHERE
dt >= date_add(
next_day ('$do_date', 'monday') ,- 7 * 3
)
AND dt <= date_add(
next_day ('$do_date', 'monday') ,- 7 * 2 - 1
)
GROUP BY
mid_id
) t1
GROUP BY
mid_id
HAVING
count(*) = 3
) t2;
INSERT INTO TABLE ads_continuity_uv_count SELECT
'$do_date',
concat(
date_add('$do_date' ,- 6),
'_',
'$do_date'
),
count(*)
FROM
(
SELECT
mid_id
FROM
(
SELECT
mid_id
FROM
(
SELECT
mid_id,
date_sub(dt, rank) date_dif
FROM
(
SELECT
mid_id,
dt,
rank () over (PARTITION BY mid_id ORDER BY dt) rank
FROM
dws_uv_detail_daycount
WHERE
dt >= date_add('$do_date' ,- 6)
AND dt <= '$do_date'
) t1
) t2
GROUP BY
mid_id,
date_dif
HAVING
count(*) >= 3
) t3
GROUP BY
mid_id
) t4;
INSERT INTO TABLE ads_user_topic SELECT
'$do_date',
sum(
IF (
login_date_last = '$do_date',
1,
0
)
),
sum(
IF (
login_date_first = '$do_date',
1,
0
)
),
sum(
IF (
payment_date_first = '$do_date',
1,
0
)
),
sum(IF(payment_count > 0, 1, 0)),
count(*),
sum(
IF (
login_date_last = '$do_date',
1,
0
)
) / count(*),
sum(IF(payment_count > 0, 1, 0)) / count(*),
sum(
IF (
login_date_first = '$do_date',
1,
0
)
) / sum(
IF (
login_date_last = '$do_date',
1,
0
)
)
FROM
dwt_user_topic;
INSERT INTO TABLE ads_user_action_convert_day SELECT
'$do_date',
uv.day_count,
ua.cart_count,
ua.cart_count / uv.day_count * 100 visitor2cart_convert_ratio,
ua.order_count,
ua.order_count / ua.cart_count * 100 visitor2order_convert_ratio,
ua.payment_count,
ua.payment_count / ua.order_count * 100 order2payment_convert_ratio
FROM
(
SELECT
'$do_date' dt,
sum(IF(cart_count > 0, 1, 0)) cart_count,
sum(IF(order_count > 0, 1, 0)) order_count,
sum(IF(payment_count > 0, 1, 0)) payment_count
FROM
dws_user_action_daycount
WHERE
dt = '$do_date'
) ua
JOIN ads_uv_count uv ON uv.dt = ua.dt;
INSERT INTO TABLE ads_product_info SELECT
'$do_date' dt,
sku_num,
spu_num
FROM
(
SELECT
'$do_date' dt,
count(*) sku_num fromdwt_sku_topic
) tmp_sku_num
JOIN (
SELECT
'$do_date' dt,
count(*) spu_num
FROM
(
SELECT
spu_id fromdwt_sku_topic
GROUP BY
spu_id
) tmp_spu_id
) tmp_spu_num ON tmp_sku_num.dt = tmp_spu_num.dt;
INSERT INTO TABLE ads_product_sale_topN SELECT
'$do_date',
sku_id,
payment_amount
FROM
dws_sku_action_daycount
WHERE
dt = '$do_date'
ORDER BY
payment_amount DESC
LIMIT 10;
INSERT INTO TABLE ads_product_favor_topN SELECT
'$do_date',
sku_id,
favor_count
FROM
dws_sku_action_daycount
WHERE
dt = '$do_date'
ORDER BY
favor_count
LIMIT 10;
INSERT INTO TABLE ads_product_cart_topN SELECT
'$do_date' dt,
sku_id,
cart_num
FROM
dws_sku_action_daycount
WHERE
dt = '$do_date'
ORDER BY
cart_num
LIMIT 10;
INSERT INTO TABLE ads_product_refund_topN SELECT
'$do_date',
sku_id,
refund_last_30d_count / payment_last_30d_count * 100 refund_ratio
FROM
dwt_sku_topic
ORDER BY
refund_ratio DESC
LIMIT 10;
INSERT INTO TABLE ads_appraise_bad_topN SELECT
'$do_date' dt,
sku_id,
appraise_bad_count / (
appraise_bad_count + appraise_good_count + appraise_mid_coun t + appraise_default_count
) * 100 appraise_bad_ratio
FROM
dws_sku_action_daycount
WHERE
dt = '$do_date'
ORDER BY
appraise_bad_ratio DESC
LIMIT 10;
INSERT INTO TABLE ads_order_daycount SELECT
'$do_date',
sum(order_count),
sum(order_amount),
sum(IF(order_count > 0, 1, 0))
FROM
dws_user_action_daycount
WHERE
dt = '$do_date';
INSERT INTO TABLE ads_payment_daycount SELECT
tmp_payment.dt,
tmp_payment.payment_count,
tmp_payment.payment_amount,
tmp_payment.payment_user_count,
tmp_skucount.payment_sku_count,
tmp_time.payment_avg_time
FROM
(
SELECT
'$do_date' dt,
sum(payment_count) payment_count,
sum(payment_amount) payment_amount,
sum(IF(payment_count > 0, 1, 0)) payment_user_count
FROM
dws_user_action_daycount
WHERE
dt = '$do_date'
) tmp_payment
JOIN (
SELECT
'$do_date' dt,
sum(IF(payment_count > 0, 1, 0)) payment_sku_count
FROM
dws_sku_action_daycount
WHERE
dt = '$do_date'
) tmp_skucount ON tmp_payment.dt = tmp_skucount.dt
JOIN (
SELECT
'$do_date' dt,
sum(
unix_timestamp(payment_time) - unix_timestamp(create_time)
) / count(*) / 60 payment_avg_time
FROM
dwd_fact_order_info
WHERE
dt = '$do_date'
AND payment_time IS NOT NULL
) tmp_time ON tmp_payment.dt = tmp_time.dt;
INSERT INTO TABLE ads_sale_tm_category1_stat_mn SELECT
mn.sku_tm_id,
mn.sku_category1_id,
mn.sku_category1_name,
sum(IF(mn.order_count >= 1, 1, 0)) buycount,
sum(IF(mn.order_count >= 2, 1, 0)) buyTwiceLast,
sum(IF(mn.order_count >= 2, 1, 0)) / sum(IF(mn.order_count >= 1, 1, 0)) buyTwiceLastRatio,
sum(IF(mn.order_count >= 3, 1, 0)) buy3timeLast,
sum(IF(mn.order_count >= 3, 1, 0)) / sum(IF(mn.order_count >= 1, 1, 0)) buy3timeLastRatio,
date_format('$do_date', 'yyyy-MM') stat_mn,
'$do_date' stat_date
FROM
(
SELECT
user_id,
sd.sku_tm_id,
sd.sku_category1_id,
sd.sku_category1_name,
sum(order_count) order_count
FROM
dws_sale_detail_daycount sd
WHERE
date_format(dt, 'yyyy-MM') = date_format('$do_date', 'yyyy-MM')
GROUP BY
user_id,
sd.sku_tm_id,
sd.sku_category1_id,
sd.sku_category1_name
) mn
GROUP BY
mn.sku_tm_id,
mn.sku_category1_id,
mn.sku_category1_name;
"
$hive -e "$sql"