0%

六、暑假作业

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
create database ods_edu;
use ods_edu;
-- create table if not exists

create database dw_edu;
create database app_edu;


create table if not exists dw_edu.t_edu_chat
(
id int comment '主键',
create_date_time timestamp comment '数据创建时间',
sid string comment '访客id',
create_time timestamp comment '会话创建时间',
seo_source string comment '搜索来源',
seo_keywords string comment '关键字',
ip string comment 'IP地址',
area string comment '地域',
country string comment '所在国家',
province string comment '省',
city string comment '城市',
origin_channel string comment '投放渠道',
`user` string comment '所属坐席',

manual_time timestamp comment '人工开始时间',

begin_time timestamp comment '坐席领取时间 ',

end_time timestamp comment '会话结束时间',

last_customer_msg_time_stamp timestamp comment '客户最后一条消息的时间',

last_agent_msg_time_stamp timestamp comment '坐席最后一下回复的时间',


reply_msg_count int comment '客服回复消息数',
msg_count int comment '客户发送消息数',
browser_name string comment '浏览器名称',
os_info string comment '系统名称'
) partitioned by (dt string comment '2019')
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';


insert overwrite table dw_edu.t_edu_chat partition (dt = '2019')
select id,
create_date_time,
sid,
create_time,

seo_source comment,
seo_keywords,
ip,
area,
country,
province,
city,
origin_channel,
`user`,
manual_time,

begin_time,

end_time,

last_customer_msg_time_stamp,

last_agent_msg_time_stamp,

reply_msg_count,
msg_count,
browser_name,
os_info
from web_chat_ems_2019_07;


--1.总访问客户量
-- 1.1年日
create table if not exists app_edu.total_year
(
date_year string comment '年份',
date_day string comment '日',
total_pel string comment '访问客户量'
) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';

insert overwrite table app_edu.total_year partition (dt ='2019')
SELECT
YEAR(create_time) AS `年份`,
DAY(create_time) AS `日`,
COUNT(DISTINCT sid) AS `访问客户量`
FROM dw_edu.t_edu_chat
WHERE YEAR(create_time) BETWEEN '2019' AND '2020'
GROUP BY YEAR(create_time), DAY(create_time)
ORDER BY YEAR(create_time), DAY(create_time);
--1.2季度日
create table if not exists app_edu.total_quater
(
date_year string comment '年份',
date_quater string comment '季度',
date_day string comment '日',
total_pel string comment '访问客户量'
) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';

insert overwrite table app_edu.total_quater partition (dt ='2019')
SELECT YEAR(create_time) AS `年份`,
QUARTER(create_time) AS `季度`,
DAY(create_time) AS `日期`,
COUNT(DISTINCT sid) AS `访问客户量`
FROM dw_edu.t_edu_chat
WHERE YEAR(create_time) = '2019'
AND QUARTER(create_time) BETWEEN 1 AND 4
GROUP BY YEAR(create_time), QUARTER(create_time), DAY(create_time)
ORDER BY YEAR(create_time), QUARTER(create_time), DAY(create_time);


--1.3月日
create table if not exists app_edu.total_month
(
date_year string comment '年份',
date_month string comment '月份',
date_day string comment '日',
total_pel string comment '访问客户量'
) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';

insert overwrite table app_edu.total_month partition (dt ='2019')
SELECT YEAR(create_time) AS `年份`,
MONTH(create_time) AS `月份`,
DAY(create_time) AS `日`,
COUNT(DISTINCT sid) AS `访问客户量`
FROM dw_edu.t_edu_chat
WHERE YEAR(create_time) = '2019'
GROUP BY YEAR(create_time), MONTH(create_time), DAY(create_time)
ORDER BY YEAR(create_time), MONTH(create_time), DAY(create_time);
-- 2 地区独立访客热力图
-- 2.1.1 地区日
create table if not exists app_edu.area_day
(
date_year string comment '年份',
date_day string comment '日',
province string ,
total_pel string comment '访问客户量'
) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';


insert overwrite table app_edu.area_day partition (dt ='2019')
SELECT YEAR(create_time) AS `年份`,
DAY(create_time) AS `日`,
province AS `省份`,
COUNT(DISTINCT sid) AS `访问量`
FROM dw_edu.t_edu_chat
WHERE create_time is not null
and YEAR(create_time) BETWEEN '2019' AND '2020'
GROUP BY YEAR(create_time), DAY(create_time), province
ORDER BY YEAR(create_time), DAY(create_time), province;
-- 2.2 地区月

--2.2季度日
SELECT YEAR(create_time) AS `年份`,
QUARTER(create_time) AS `季度`,
DAY(create_time) AS `日期`,
province AS `省份`,
COUNT(DISTINCT sid) AS `访问客户量`
FROM dw_edu.t_edu_chat
WHERE YEAR(create_time) = '2019'
AND QUARTER(create_time) BETWEEN 1 AND 4
GROUP BY YEAR(create_time), QUARTER(create_time), DAY(create_time), province
ORDER BY YEAR(create_time), QUARTER(create_time), DAY(create_time), province;

--2.3 month
SELECT YEAR(create_time) AS `年份`,
MONTH(create_time) AS `月份`,
DAY(create_time) AS `日期`,
province,
COUNT(DISTINCT sid) AS `访问客户量`
FROM dw_edu.t_edu_chat
WHERE YEAR(create_time) = '2019'
GROUP BY YEAR(create_time), MONTH(create_time), DAY(create_time), province
ORDER BY YEAR(create_time), MONTH(create_time), DAY(create_time), province;


-- SELECT province, COUNT(DISTINCT CASE WHEN msg_count > 0 THEN sid END) AS `咨询人数`
-- FROM dw_edu.t_edu_chat
-- GROUP BY province;


-- 3. 访客咨询率趋势
create table if not exists app_edu.click_tender
(
date_year string comment '年份',
date_month string ,
date_day string,
province string ,
con_cnt int,
percent_ string

) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';

insert overwrite table app_edu.click_tender partition (dt ='2019')
SELECT
YEAR(create_time) AS `年份`,
MONTH(create_time) AS `月份`,
DAY(create_time) AS `日期`,
province, COUNT(DISTINCT CASE WHEN msg_count > 0 THEN sid END) AS `咨询人数`,
concat(round(COUNT(DISTINCT CASE WHEN msg_count > 0 THEN sid END) / total.total_consultations * 100, 2), '%') as `百分比`
FROM dw_edu.t_edu_chat
CROSS JOIN (SELECT COUNT(DISTINCT CASE WHEN msg_count > 0 THEN sid END) AS total_consultations FROM dw_edu.t_edu_chat) AS total
GROUP BY YEAR(create_time), MONTH(create_time), DAY(create_time), province, total.total_consultations;

--4. 客户访问量和访客咨询率双轴趋势
-- concat(round( COUNT(DISTINCT CASE WHEN msg_count > 0 THEN sid END) / COUNT(DISTINCT sid) * 100, 2), '%') as `百分比`
create table if not exists app_edu.vis_tender
(
date_year string comment '年份',
date_month string ,
date_day string,
con_cnt int,
percent_ string

) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';
insert overwrite table app_edu.vis_tender partition (dt ='2019')
SELECT YEAR(create_time) AS `年份`,
MONTH(create_time) AS `月份`,
DAY(create_time) AS `日期`,
COUNT(DISTINCT sid) AS `客户访问量`,
concat(round( COUNT(DISTINCT CASE WHEN msg_count > 0 THEN sid END) / COUNT(DISTINCT sid) * 100, 2), '%') AS `咨询率`
FROM dw_edu.t_edu_chat
-- WHERE create_time BETWEEN '2019-01-01' AND '2020' -- 替换为实际的日期范围
GROUP BY YEAR(create_time), MONTH(create_time), DAY(create_time);
-- 5. 时间段访问客户量趋势
-- 指定时间段的起始日期和结束日期
-- DECLARE @start_date DATE = '开始日期';
-- DECLARE @end_date DATE = '结束日期';

-- 查询指定时间段内每个小时的访问客户量
create table if not exists app_edu.vis_per_hour
(

date_hour string,
con_cnt int

) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';
insert overwrite table app_edu.vis_per_hour partition (dt ='2019')
SELECT HOUR(create_time) AS `小时`,
COUNT(DISTINCT sid) AS `访问客户量`
FROM dw_edu.t_edu_chat
-- WHERE create_date_time >= @start_date AND create_date_time <= @end_date
GROUP BY HOUR(create_time)
ORDER BY HOUR(create_time);
-- 6.
-- 查询指定时间段内不同来源渠道和小时的访问客户量
create table if not exists app_edu.dif_channel
(
chennel_c string,
visiting int,
percent_ string

) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';
insert overwrite table app_edu.dif_channel partition (dt ='2019')
SELECT origin_channel AS `来源渠道`,
COUNT(DISTINCT sid) AS `访问客户量`,
concat(round( COUNT(DISTINCT sid) * 100.0 / (
SELECT COUNT(DISTINCT sid)
FROM dw_edu.t_edu_chat

) , 2), '%') AS `占比`
FROM dw_edu.t_edu_chat
GROUP BY origin_channel
ORDER BY origin_channel;
-- 7.
-- 指定时间段的起始日期和结束日期
create table if not exists app_edu.dif_source
(
source_c string,
visit_count int,
percent_ string

) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';
insert overwrite table app_edu.dif_source partition (dt ='2019')
SELECT
seo_source,
COUNT(DISTINCT sid) AS visitor_count,
concat(round( COUNT(DISTINCT sid) * 100.0 / (
SELECT COUNT(DISTINCT sid)
FROM dw_edu.t_edu_chat

) , 2), '%') AS visitor_ratio
FROM dw_edu.t_edu_chat
GROUP BY seo_source;



-- 8.
-- 查询访问客户量最多的页面排行榜
create table if not exists app_edu.top_web
(
date_year string comment '年份',
date_month string ,
date_day string,
brouse_name string,
vis_cnt int

) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';
insert overwrite table app_edu.top_web partition (dt ='2019')
SELECT YEAR(create_time) AS `年份`,
MONTH(create_time) AS `月份`,
DAY(create_time) AS `日期`,
browser_name,
COUNT(1) AS visitor_count
FROM ods_edu.web_chat_ems_2019_07
GROUP BY browser_name,YEAR(create_time),MONTH(create_time),DAY(create_time)
ORDER BY visitor_count DESC
LIMIT 20;


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
create database app_edu_chat;
use app_edu_chat;
create table if not exists app_edu_chat.total_year
(
date_year varchar(20) comment '年份',
date_day varchar(20) comment '日',
total_pel varchar(20) comment '访问客户量'
);
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table total_year \
--export-dir /user/hive/warehouse/app_edu.db/total_year/dt=2019
create table if not exists app_edu_chat.total_quater
(
date_year varchar(20),
date_quater varchar(20),
date_day varchar(20),
total_pel varchar(20)
)
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table total_quater \
--export-dir /user/hive/warehouse/app_edu.db/total_quater/dt=2019

create table if not exists app_edu_chat.total_month
(
date_year varchar(20),
date_month varchar(20),
date_day varchar(20),
total_pel varchar(20)
);
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table total_month \
--export-dir /user/hive/warehouse/app_edu.db/total_month/dt=2019

create table if not exists app_edu_chat.area_day
(
date_year varchar(20),
date_day varchar(20),
province varchar(20),
total_pel varchar(20)
)
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table area_day \
--export-dir /user/hive/warehouse/app_edu.db/area_day/dt=2019


create table if not exists app_edu_chat.click_tender
(
date_year varchar(20),
date_month varchar(20),
date_day varchar(20),
province varchar(20),
con_cnt int,
percent_ varchar(20)

) ;
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table click_tender \
--export-dir /user/hive/warehouse/app_edu.db/click_tender/dt=2019


create table if not exists app_edu_chat.vis_tender
(
date_year varchar(20),
date_month varchar(20),
date_day varchar(20),
con_cnt int,
percent_ varchar(20)

)
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table vis_tender \
--export-dir /user/hive/warehouse/app_edu.db/vis_tender/dt=2019

create table if not exists app_edu_chat.vis_per_hour
(

date_hour varchar(20),
con_cnt int

);
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table vis_per_hour \
--export-dir /user/hive/warehouse/app_edu.db/vis_per_hour/dt=2019

create table if not exists app_edu_chat.dif_channel
(
chennel_c varchar(20),
visiting int,
percent_ varchar(20)

)
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table dif_channel \
--export-dir /user/hive/warehouse/app_edu.db/dif_channel/dt=2019

create table if not exists app_edu_chat.dif_source
(
source_c varchar(20),
visit_count int,
percent_ varchar(20)

)
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table dif_source \
--export-dir /user/hive/warehouse/app_edu.db/dif_source/dt=2019


create table if not exists app_edu_chat.top_web
(
date_year varchar(20),
date_month varchar(20),
date_day varchar(20),
brouse_name varchar(20),
vis_cnt int
)
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table top_web \
--export-dir /user/hive/warehouse/app_edu.db/top_web/dt=2019

mysql导入hive

1
2
3
4
5
6
7
8
sqoop import \ --connect jdbc:mysql://node1:3306/nev \
--username root \
--password 123456 \
--table web_chat_ems_2019_07 \
--warehouse-dir /user/hive/warehouse
--hive-database ods_edu \
--hive-import \
--hive-table web_chat_ems_2019_07
-------------本文结束感谢您的阅读-------------
老板你好,讨口饭吃