0%

五、一些完成的sql

1
-- 1.数据仓库构建-- 1.1创建ods库create database if not exists ods_didi;-- 1.2创建dw数据库create database if not exists dw_didi;-- 1.3创建app数据库create database if not exists app_didi;use ods_didi;-- 2.在ods层创建表-- 2.1创建订单结构表-- 创建用户订单表结构create table if not exists ods_didi.t_user_order(    orderId         string comment '订单id',    telephone       string comment '打车用户手机',    lng             string comment '用户发起打车的经度',    lat             string comment '用户发起打车的纬度',    province        string comment '所在省份',    city            string comment '所在城市',    es_money        double comment '预估打车费用',    gender          string comment '用户信息 - 性别',    profession      string comment '用户信息 - 行业',    age_range       string comment '年龄段(70后、80后、...)',    tip             double comment '小费',    subscribe       int comment '是否预约(0 - 非预约、1 - 预约)',    sub_time        string comment '预约时间',    is_agent        int comment '是否代叫(0 - 本人、1 - 代叫)',    agent_telephone string comment '预约人手机',    order_time      string comment '订单时间')    partitioned by (dt string comment '时间分区')    ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';-- ods创建取消订单表create table if not exists ods_didi.t_user_cancel_order(    orderId        string comment '订单ID',    cstm_telephone string comment '客户联系电话',    lng            string comment '取消订单的经度',    lat            string comment '取消订单的纬度',    province       string comment '所在省份',    city           string comment '所在城市',    es_distance    double comment '预估距离',    gender         string comment '性别',    profession     string comment '行业',    age_range      string comment '年龄段',    reason         int comment '取消订单原因(1 - 选择了其他交通方式、2 - 与司机达成一致,取消订单、3 - 投诉司机没来接我、4 - 已不需要用车、5 - 无理由取消订单)',    cancel_time    string comment '取消时间')    partitioned by (dt string comment '时间分区')    ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';-- ods创建订单表支付表create table if not exists ods_didi.t_user_pay_order(    id                         string comment '支付订单ID',    orderId                    string comment '订单ID',    lng                        string comment '目的地的经度(支付地址)',    lat                        string comment '目的地的纬度(支付地址)',    province                   string comment '省份',    city                       string comment '城市',    total_money                double comment '车费总价',    real_pay_money             double comment '实际支付总额',    passenger_additional_money double comment '乘客额外加价',    base_money                 double comment '车费合计',    has_coupon                 int comment '是否使用优惠券(0 - 不使用、1 - 使用)',    coupon_total               double comment '优惠券合计',    pay_way                    int comment '支付方式(0 - 微信支付、1 - 支付宝支付、3 - QQ钱包支付、4 - 一网通银行卡支付)',    mileage                    double comment '里程(单位公里)',    pay_time                   string comment '支付时间')    partitioned by (dt string comment '时间分区')    ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';-- ods创建用户评价表create table if not exists ods_didi.t_user_evaluate(    id                  string comment '评价日志唯一ID',    orderId             string comment '订单ID',    passenger_telephone string comment '用户电话',    passenger_province  string comment '用户所在省份',    passenger_city      string comment '用户所在城市',    eva_level           int comment '评价等级(1 - 一颗星、... 5 - 五星)',    eva_time            string comment '评价时间')    partitioned by (dt string comment '时间分区')    ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';-- 创建数据仓库 导入数据load data local inpath '/export/data/didi/order.csv' into table ods_didi.t_user_order partition (dt = '2020-04-12');load data local inpath '/export/data/didi/cancel_order.csv' into table ods_didi.t_user_cancel_order partition (dt = '2020-04-12');load data local inpath '/export/data/didi/pay.csv' into table ods_didi.t_user_pay_order partition (dt = '2020-04-12');load data local inpath '/export/data/didi/evaluate.csv' into table ods_didi.t_user_evaluate partition (dt = '2020-04-12');-- truncate table dw_didi.t_user_pay_order;-- 3.在dw层进行数据预处理use dw_didi;-- 创建宽表语句create table if not exists dw_didi.t_user_order_wide(    orderId          string comment '订单id',    telephone        string comment '打车用户手机',    lng              string comment '用户发起打车的经度',    lat              string comment '用户发起打车的纬度',    province         string comment '所在省份',    city             string comment '所在城市',    es_money         double comment '预估打车费用',    gender           string comment '用户信息 - 性别',    profession       string comment '用户信息 - 行业',    age_range        string comment '年龄段(70后、80后、...)',    tip              double comment '小费',    subscribe        int comment '是否预约(0 - 非预约、1 - 预约)',    subscribe_name   string comment '是否预约名称',    sub_time         string comment '预约时间',    is_agent         int comment '是否代叫(0 - 本人、1 - 代叫)',    is_agent_name    string comment '是否代缴名称',    agent_telephone  string comment '预约人手机',    order_time       string comment '订单时间',    order_date       string comment '订单时间,yyyy-MM-dd',    order_year       string comment '年',    order_month      string comment '月',    order_day        string comment '日',    order_hour       string comment '小时',    order_time_range string comment '时间段')    partitioned by (dt string comment '2020-04-12')    ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';-- 预处理sql语句 用户订单处理insert overwrite table dw_didi.t_user_order_wide partition (dt = '2020-04-12')select orderid,       telephone,       lng,       lat,       province,       city,       es_money,       gender,       profession,       age_range,       tip,       subscribe,--        if(nvl(subscribe, 0) = 0, '非预约', '预约')                   as subscribe_name,       case           when subscribe = 0 or (subscribe is null) then '非预约'           when subscribe = 1 then '预约'           end                                                             as subscribe_name,       date_format(sub_time, 'yyyy-MM-dd')                                 as sub_time,       is_agent,       case           when is_agent = 0 or (subscribe is null) then '本人'           when is_agent = 1 then '代叫'           end                                                             as is_agent_name,       agent_telephone,--        substr(order_time, 1, 4)                                      as year,       date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')       as order_time,       date_format(order_time, 'yyyy-MM-dd')                               as order_data,       year(date_format(order_time, 'yyyy-MM-dd'))                         as order_year,       month(date_format(order_time, 'yyyy-MM-dd'))                        as order_month,       day(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss'))  as order_day,       hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) as order_hour,       case           when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 1 and                hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 5 then '凌晨'           when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 5 and                hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 8 then '早上'           when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 8 and                hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 11 then '上午'           when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 11 and                hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 13 then '中午'           when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 13 and                hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 17 then '下午'           when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 17 and                hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 19 then '晚上'           when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 19 and                hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 20 then '半夜'           when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 20 and                hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 24 then '深夜'           when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 0 and                hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 1 then '凌晨'           end--        date_format(order_time, 'yyyy-MM-dd HH:mm:ss'),from ods_didi.t_user_orderwhere length(order_time) >= 8  and dt = '2020-04-12';create table if not exists dw_didi.t_user_cancel_order(    orderId     string,    Profession  string,    age_range   string,    Reason      string,    cancel_time string)    partitioned by (dt string comment '2020-04-12')    ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';insert overwrite table dw_didi.t_user_cancel_order partition (dt = '2020-04-12')select orderId,       profession,       age_range,       reason,       cancel_timefrom ods_didi.t_user_cancel_orderwhere dt = '2020-04-12';create table if not exists dw_didi.t_user_pay_order(    id             string comment '支付订单ID',    orderId        string comment '订单ID',    real_pay_money double comment '实际支付总额',    has_coupon     int comment '是否使用优惠券(0 - 不使用、1 - 使用)',    pay_way        int comment '支付方式(0-微信支付、1-支付宝支付、3-QQ钱包支付、4- 一网通银行卡支付)',    mileage        double comment '里程(单位公里)',    pay_time       string comment '支付时间')    partitioned by (dt string comment '2020-04-12')    ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';insert overwrite table dw_didi.t_user_pay_order partition (dt = '2020-04-12')select id,       orderId,       real_pay_money,       has_coupon,       pay_way,       mileage,       pay_timefrom ods_didi.t_user_pay_orderwhere dt = '2020-04-12';create table if not exists dw_didi.t_user_evaluate(    id        string comment '评价日志唯一ID',    orderId   string comment '订单ID',    eva_level int comment '评价等级(1 - 一颗星、... 5 - 五星)',    eva_time  string comment '评价时间')    partitioned by (dt string comment '时间分区')    ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';insert overwrite table dw_didi.t_user_evaluate partition (dt = '2020-04-12')select id,       orderId,       eva_level,       eva_timefrom ods_didi.t_user_evaluatewhere dt = '2020-04-12';-- 4.数据处理-- 4.1 总订单笔数分析-- 4.1.1计算4.12的总订单笔数分析select max(dt) as     `时间`,       count(orderid) `订单总笔数`from dw_didi.t_user_order_widewhere dt = '2020-04-12';--建表create table if not exists app_didi.t_order_total(    date_val string comment '日期(yyyy-MM-dd)',    count    int comment '订单笔数') partitioned by (month string comment '按月分区yyyy-MM')    row format delimited fields terminated by ',';--加载数据据insert into table app_didi.t_order_total partition (month = '2020-04')select max(dt)        as `时间`,       count(orderid) as `订单时间`from dw_didi.t_user_order_widewhere dt = '2020-04-12';-- truncate table app_didi.t_order_total;-- 4.2 预约订单/非预约订单占比分析-- sum,avg,max,min-- 预约单/总单*100%select count(*) as cnt_totalfrom dw_didi.t_user_order_widewhere dt = '2020-04-12';select max(order_date)       as `日期`,       subscribe_name        as `是否预约`,       count(subscribe_name) as cntfrom dw_didi.t_user_order_widewhere dt = '2020-04-12'group by subscribe_name;--左连接select *from (select max(order_date)       as `日期`,             subscribe_name        as `是否预约`,             count(subscribe_name) as cnt      from dw_didi.t_user_order_wide      where dt = '2020-04-12'      group by subscribe_name) t1         left join (select count(*) as cnt_total                    from dw_didi.t_user_order_wide                    where dt = '2020-04-12') t2;-- 隐式内连接select `日期`,       `是否预约`,       concat(round(cnt / cnt_total * 100, 2), '%') as `百分比`from (select max(order_date)       as `日期`,             subscribe_name        as `是否预约`,             count(subscribe_name) as cnt      from dw_didi.t_user_order_wide      where dt = '2020-04-12'      group by subscribe_name) t1,     (select count(*) as cnt_total      from dw_didi.t_user_order_wide      where dt = '2020-04-12') t2;--开窗函数select order_date     as                                        `日期`,       subscribe_name as                                        `是否预约`,       count(subscribe_name) over (partition by subscribe_name) cnt,       count() over ()                                          cnt_totalfrom dw_didi.t_user_order_widewhere dt = '2020-04-12';-- group by subscribe_name;--开窗函数2select max(`日期`) as                                         `日期`,       `是否预约`,       concat(round(max(cnt) / max(cnt_total) * 100, 2), '%') `百分比`from (select order_date     as                                        `日期`,             subscribe_name as                                        `是否预约`,             count(subscribe_name) over (partition by subscribe_name) cnt,             count() over ()                                          cnt_total      from dw_didi.t_user_order_wide      where dt = '2020-04-12') tgroup by `是否预约`;--方法三select `日期`,       `是否预约`,       concat(round((cnt / sum(cnt) over ()) * 100, 2), '%') `百分比`from (select max(order_date)       as `日期`,             subscribe_name        as `是否预约`,             count(subscribe_name) as cnt      from dw_didi.t_user_order_wide      where dt = '2020-04-12'      group by subscribe_name) t;create table if not exists app_didi.t_order_subscribe_percent(    date_val       string comment '日期',    subscribe_name string comment '是否预约',    percent_val    string comment '百分比') partitioned by (month string comment '年月yyyy-MM')    row format delimited fields terminated by ',';select `日期`,       `是否预约`,       cnt / sum(cnt) over () * 100from (select max(order_date)       as `日期`,             subscribe_name        as `是否预约`,             count(subscribe_name) as cnt      from dw_didi.t_user_order_wide      where dt = '2020-04-12'      group by subscribe_name) t;insert overwrite table app_didi.t_order_subscribe_percent partition (month = '2020-04')select `日期`,       `是否预约`,       concat(round((cnt / sum(cnt) over ()) * 100, 2), '%') `百分比`from (select max(order_date)       as `日期`,             subscribe_name        as `是否预约`,             count(subscribe_name) as cnt      from dw_didi.t_user_order_wide      where dt = '2020-04-12'      group by subscribe_name) t;-- 4.3不同时段订单的个数create table if not exists app_didi.t_order_timerange_total(    datetime  string comment '日期',    timerange string comment '时间段',    count     int comment '订单数量')    partitioned by (month string comment '年月,yyyy-MM')    row format delimited fields terminated by ',';--sqlselect max(dt),       order_time_range,       count(*) as order_cntfrom dw_didi.t_user_order_widewhere dt = '2020-04-12'group by order_time_range;--加载数据insert overwrite table app_didi.t_order_timerange_total partition (month = '2020-04')select max(dt),       order_time_range,       count(*) as order_cntfrom dw_didi.t_user_order_widewhere dt = '2020-04-12'group by order_time_range;--4.4不同年龄段、时段订单个数select max(dt),       age_range,       order_time_range,       count(*) as order_cntfrom dw_didi.t_user_order_widewhere dt = '2020-04-12'group by age_range,         order_time_range;create table if not exists app_didi.t_order_age_and_time_range_total(    datetime         string comment '日期',    age_range        string comment '年龄段',    order_time_range string comment '时段',    count            int comment '订单数量')    partitioned by (month string comment '年月,yyyy-MM')    row format delimited fields terminated by ',';insert overwrite table app_didi.t_order_age_and_time_range_total partition (month = '2020-04')select max(dt),       age_range,       order_time_range,       count(*) as order_cntfrom dw_didi.t_user_order_widewhere dt = '2020-04-12'group by age_range, order_time_range;--4.4不同地域订单个数select province, count(*) as order_cntfrom dw_didi.t_user_order_widewhere dt = '2020-04-12'group by province;--建表create table if not exists app_didi.t_order_province_total(    datetime string comment '日期',    province string comment '省份',    count    int comment '订单数量')    partitioned by (month string comment '年月,yyyy-MM')    row format delimited fields terminated by ',';insert overwrite table app_didi.t_order_province_total partition (month = '2020-04')select '2020-04-12',       province,       count(*) as order_cntfrom dw_didi.t_user_order_widewhere dt = '2020-04-12'group by province;-- 4.5求订单客户职业排名top5-- 第一步 :按职业分组求客户数量select max(dt),       profession,       count(orderId)from dw_didi.t_user_order_widewhere dt = '2020-04-12'group by profession;-- 第二部 排名select dt1,       profession,       cnt,       row_number() over (order by cnt desc )from (select max(dt) as     dt1,             profession,             count(orderId) cnt      from dw_didi.t_user_order_wide      where dt = '2020-04-12'      group by profession) t;-- 取前五select *from (select dt1,             profession,             cnt,             row_number() over (order by cnt desc ) as rk      from (select max(dt) as     dt1,                   profession,                   count(orderId) cnt            from dw_didi.t_user_order_wide            where dt = '2020-04-12'            group by profession) t1) t2where rk <= 5;with t1 as (select max(dt) dt1,                   profession,                   count(orderId) cnt            from dw_didi.t_user_order_wide            where dt = '2020-04-12'            group by profession),     t2 as (select dt1,profession,cnt,                   row_number() over (order by cnt desc ) as rk            from t1)select *from t2where rk<=5;select *from (select t.profession,             t.cnt,             rank() over (order by t.cnt desc ) as rk      from (select profession,                   count(*) as cnt            from dw_didi.t_user_order_wide            group by profession) t) ttwhere tt.rk <= 5;--建表create table if not exists app_didi.t_order_profession_total_topn(    profession string comment '职业',    Order_cnt  int comment '订单数量',    rk         int comment '排名')    partitioned by (month string comment '年月,yyyy-MM')    row format delimited fields terminated by ',';--加载数据insert overwrite table app_didi.t_order_profession_total_topn partition (month = '2020-04')select *from (select t.profession,             t.cnt,             rank() over (order by t.cnt desc ) as rk      from (select profession, count(*) as cnt from dw_didi.t_user_order_wide group by profession) t) ttwhere tt.rk <= 5;--4.6用户订单取消占比select '2020-04-12'                                                date_val,       concat(round(t1.total_cnt / t2.total_cnt * 100, 2), '%') as cancel_order_percentfrom (select count(orderid) as total_cnt      from ods_didi.t_user_cancel_order      where dt = '2020-04-12') t1        ,     (select count(orderid) as total_cnt      from dw_didi.t_user_order_wide      where dt = '2020-04-12') t2;--创建表create table if not exists app_didi.t_order_cancel_order_percent(    datetime             string comment '日期',    cancel_order_percent string comment '百分比')    partitioned by (month string comment '年月,yyyy-MM')    row format delimited fields terminated by ',';--加载数据insert overwrite table app_didi.t_order_cancel_order_percent partition (month = '2020-04')select '2020-04-12'                                                date_val        ,       concat(round(t1.total_cnt / t2.total_cnt * 100, 2), '%') as percent_valfrom (select count(*) total_cnt from ods_didi.t_user_cancel_order where dt = '2020-04-12') t1        ,     (select count(*) total_cnt from dw_didi.t_user_order_wide where dt = '2020-04-12') t2;-- 4.8统计用户取消订单原因top1with t1 as(select               reason,profession,                   count(reason) over() cnt           from dw_didi.t_user_cancel_order           where dt = '2020-04-12'         ),     t2 as(select reason,profession,  cnt,                  row_number() over(order by cnt desc) as rk           from t1     )select *from t2where rk<=5;insert overwrite table  app_didi.t_order_cancel_reason partition (month = '2020-04')select *from(        select            t.profession,             t.cnt,             rank() over (order by t.cnt desc ) as rk        from (select                  profession,                  count(*) as cnt              from dw_didi.t_user_cancel_order group by profession) t    ) ttwhere tt.rk <= 5;--建表create table if not exists app_didi.t_order_cancel_reason(    profession string comment '职业',    cancel_cnt  int comment '订单数量',    rk         int comment '排名')partitioned by (month string comment '年月,yyyy-MM')row format delimited fields terminated by ',';--统计每个省订单量最高的城市top3select city,    count(city)from dw_didi.t_user_order_widewhere length(city) > 0group by  city;--建表insert overwrite table  app_didi.t_order_city partition (month = '2020-04')select *from(        select            t.city,            t.cnt,            rank() over (order by t.cnt desc ) as rk        from (select city,                     count(city) as cnt              from dw_didi.t_user_order_wide              where length(city) > 0              group by  city) t    ) ttwhere tt.rk <= 3;create table if not exists app_didi.t_order_city(    city string comment '城市',    order_cnt  int comment '订单数量',    rk         int comment '排名')    partitioned by (month string comment '年月,yyyy-MM')    row format delimited fields terminated by ',';--统计订单支付中使用优惠券的百分比create table if not exists app_didi.t_order_dicount(    isdicount string comment '是否使用优惠券',    order_cnt  string comment '百分比')partitioned by (month string comment '年月,yyyy-MM')    row format delimited fields terminated by ',';insert overwrite table  app_didi.t_order_dicount partition (month = '2020-04')select       `是否使用优惠券`,       concat(round((cnt / sum(cnt) over ()) * 100, 2), '%') `百分比`from (select             has_coupon         as `是否使用优惠券`,             count( has_coupon) as cnt      from dw_didi.t_user_pay_order      where has_coupon!=17      group by  has_coupon) t;--统计用户五星级好评的百分比create table if not exists app_didi.t_order_five_start(    fivestart string comment '是否是5',    order_cnt  string comment '百分比')    partitioned by (month string comment '年月,yyyy-MM')    row format delimited fields terminated by ',';insert overwrite table  app_didi.t_order_five_start partition (month = '2020-04')select    `是否是5`,    concat(round((cnt / sum(cnt) over ()) * 100, 2), '%') `百分比`from (select          eva_level         as `是否是5`,          count(eva_level ) as cnt      from dw_didi.t_user_evaluate      where length(eva_level)>0      group by  eva_level ) t;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
create table if not exists app_didi.t_order_subscribe_percent
(
date_val date,
subscribe_name varchar(20),
percent_val varchar(20)
);

#创建不同时段订单统计目标表
create table if not exists app_didi.t_order_timerange_total
(
order_date date,
timerange varchar(20),
count int
);

#创建不同地域订单统计目标表
create table if not exists app_didi.t_order_province_total
(
order_date date,
province varchar(20),
count int
);
--
create table if not exists app_didi.t_order_profession_total_topn(
profession varchar(20),
order_cnt int,
rk int
);
/export/server/sqoop-1.4.6/bin/sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_didi \
--username root \
--password 123456 \
--table t_order_profession_total_topn \
--export-dir /user/hive/warehouse/app_didi.db/t_order_profession_total_topn/month=2020-04;
--
create table if not exists app_didi.t_order_cancel_order_percent
(
datetime string comment '日期',
cancel_order_percent string comment '百分比'
)
create table if not exists app_didi.t_order_cancel_order_percent(
datatime varchar(20),
cancel_order_percent varchar(20)
);

create table if not exists app_didi.t_detail
(
datetime varchar(20) comment '日期',
province varchar(20) comment '省份',
count int comment '订单数量',
lng varchar(50) comment '经度' ,
lat varchar(50) comment '纬度'
);
/export/server/sqoop-1.4.6/bin/sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_didi \
--username root \
--password 123456 \
--table t_detail \
--export-dir /user/hive/warehouse/app_didi.db/t_detail/month=2020-04;

#创建不同年龄段,不同时段订单目标表
create table if not exists app_didi.t_order_age_and_time_range_total
(
order_date date,
age_range varchar(20),
order_time_range varchar(20),
count int
);


/export/server/sqoop-1.4.6/bin/sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_didi \
--username root \
--password 123456 \
--table t_order_subscribe_percent \
--export-dir /user/hive/warehouse/app_didi.db/t_order_subscribe_percent/month=2020-04;


/export/server/sqoop-1.4.6/bin/sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_didi \
--username root \
--password 123456 \
--table t_order_timerange_total \
--export-dir /user/hive/warehouse/app_didi.db/t_order_timerange_total/month=2020-04

/export/server/sqoop-1.4.6/bin/sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_didi \
--username root \
--password 123456 \
--table t_order_province_total \
--export-dir /user/hive/warehouse/app_didi.db/t_order_province_total/month=2020-04

/export/server/sqoop-1.4.6/bin/sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_didi \
--username root \
--password 123456 \
--table t_order_age_and_time_range_total \
--export-dir /user/hive/warehouse/app_didi.db/t_order_age_and_time_range_total/month=2020-04
create table if not exists app_didi.t_order_cancel_reason
(
profession varchar(20) comment '职业',
cancel_cnt int comment '订单数量',
rk int comment '排名'
);
/export/server/sqoop-1.4.6/bin/sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_didi \
--username root \
--password 123456 \
--table t_order_cancel_reason \
--export-dir /user/hive/warehouse/app_didi.db/t_order_cancel_reason/month=2020-04

create table if not exists app_didi.t_order_city
(
city varchar(20) comment '城市',
order_cnt int comment '订单数量',
rk int comment '排名'
);
/export/server/sqoop-1.4.6/bin/sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_didi \
--username root \
--password 123456 \
--table t_order_city \
--export-dir /user/hive/warehouse/app_didi.db/t_order_city/month=2020-04

create table if not exists app_didi.t_order_dicount
(
isdicount varchar(20) comment '是否使用优惠券',
order_cnt varchar(20)comment '百分比'
);
/export/server/sqoop-1.4.6/bin/sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_didi \
--username root \
--password 123456 \
--table t_order_dicount \
--export-dir /user/hive/warehouse/app_didi.db/t_order_dicount/month=2020-04

create table if not exists app_didi.t_order_five_start
(
fivestart varchar(20) comment '是否是5',
order_cnt varchar(20) comment '百分比'
);
/export/server/sqoop-1.4.6/bin/sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_didi \
--username root \
--password 123456 \
--table t_order_five_start \
--export-dir /user/hive/warehouse/app_didi.db/t_order_five_start/month=2020-04

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
-- 1.数据仓库构建
-- 1.1创建ods库
create database if not exists ods_didi;
-- 1.2创建dw数据库
create database if not exists dw_didi;
-- 1.3创建app数据库
create database if not exists app_didi;
use ods_didi;
-- 2.在ods层创建表
-- 2.1创建订单结构表
-- 创建用户订单表结构
create table if not exists ods_didi.t_user_order
(
orderId string comment '订单id',
telephone string comment '打车用户手机',
lng string comment '用户发起打车的经度',
lat string comment '用户发起打车的纬度',
province string comment '所在省份',
city string comment '所在城市',
es_money double comment '预估打车费用',
gender string comment '用户信息 - 性别',
profession string comment '用户信息 - 行业',
age_range string comment '年龄段(70后、80后、...)',
tip double comment '小费',
subscribe int comment '是否预约(0 - 非预约、1 - 预约)',
sub_time string comment '预约时间',
is_agent int comment '是否代叫(0 - 本人、1 - 代叫)',
agent_telephone string comment '预约人手机',
order_time string comment '订单时间'
) partitioned by (dt string comment '时间分区')
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
-- ods创建取消订单表
create table if not exists ods_didi.t_user_cancel_order
(
orderId string comment '订单ID',
cstm_telephone string comment '客户联系电话',
lng string comment '取消订单的经度',
lat string comment '取消订单的纬度',
province string comment '所在省份',
city string comment '所在城市',
es_distance double comment '预估距离',
gender string comment '性别',
profession string comment '行业',
age_range string comment '年龄段',
reason int comment '取消订单原因(1 - 选择了其他交通方式、2 - 与司机达成一致,取消订单、3 - 投诉司机没来接我、4 - 已不需要用车、5 - 无理由取消订单)',
cancel_time string comment '取消时间'
) partitioned by (dt string comment '时间分区')
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
-- ods创建订单表支付表
create table if not exists ods_didi.t_user_pay_order
(
id string comment '支付订单ID',
orderId string comment '订单ID',
lng string comment '目的地的经度(支付地址)',
lat string comment '目的地的纬度(支付地址)',
province string comment '省份',
city string comment '城市',
total_money double comment '车费总价',
real_pay_money double comment '实际支付总额',
passenger_additional_money double comment '乘客额外加价',
base_money double comment '车费合计',
has_coupon int comment '是否使用优惠券(0 - 不使用、1 - 使用)',
coupon_total double comment '优惠券合计',
pay_way int comment '支付方式(0 - 微信支付、1 - 支付宝支付、3 - QQ钱包支付、4 - 一网通银行卡支付)',
mileage double comment '里程(单位公里)',
pay_time string comment '支付时间'
) partitioned by (dt string comment '时间分区')
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
-- ods创建用户评价表
create table if not exists ods_didi.t_user_evaluate
(
id string comment '评价日志唯一ID',
orderId string comment '订单ID',
passenger_telephone string comment '用户电话',
passenger_province string comment '用户所在省份',
passenger_city string comment '用户所在城市',
eva_level int comment '评价等级(1 - 一颗星、... 5 - 五星)',
eva_time string comment '评价时间'
) partitioned by (dt string comment '时间分区')
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
-- 创建数据仓库 导入数据
load data local inpath '/export/data/didi/order.csv' into table ods_didi.t_user_order partition (dt = '2020-04-12');
load data local inpath '/export/data/didi/cancel_order.csv' into table ods_didi.t_user_cancel_order partition (dt = '2020-04-12');
load data local inpath '/export/data/didi/pay.csv' into table ods_didi.t_user_pay_order partition (dt = '2020-04-12');
load data local inpath '/export/data/didi/evaluate.csv' into table ods_didi.t_user_evaluate partition (dt = '2020-04-12');
truncate table dw_didi.t_user_pay_order;
-- 3.在dw层进行数据预处理use dw_didi;
-- 创建宽表语句
create table if not exists dw_didi.t_user_order_wide
(
orderId string comment '订单id',
telephone string comment '打车用户手机',
lng string comment '用户发起打车的经度',
lat string comment '用户发起打车的纬度',
province string comment '所在省份',
city string comment '所在城市',
es_money double comment '预估打车费用',
gender string comment '用户信息 - 性别',
profession string comment '用户信息 - 行业',
age_range string comment '年龄段(70后、80后、...)',
tip double comment '小费',
subscribe int comment '是否预约(0 - 非预约、1 - 预约)',
subscribe_name string comment '是否预约名称',
sub_time string comment '预约时间',
is_agent int comment '是否代叫(0 - 本人、1 - 代叫)',
is_agent_name string comment '是否代缴名称',
agent_telephone string comment '预约人手机',
order_time string comment '订单时间',
order_date string comment '订单时间,yyyy-MM-dd',
order_year string comment '年',
order_month string comment '月',
order_day string comment '日',
order_hour string comment '小时',
order_time_range string comment '时间段'
) partitioned by (dt string comment '2020-04-12')
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
-- 预处理sql语句 用户订单处理
insert overwrite table dw_didi.t_user_order_wide partition (dt = '2020-04-12')
select orderid,
telephone,
lng,
lat,
province,
city,
es_money,
gender,
profession,
age_range,
tip,
subscribe,
if(nvl(subscribe, 0) = 0, '非预约', '预约') as subscribe_name,
case
when subscribe = 0 or (subscribe is null) then '非预约'
when subscribe = 1
then '预约' end as subscribe_name,
date_format(sub_time, 'yyyy-MM-dd') as sub_time,
is_agent,
case when is_agent = 0 or (subscribe is null) then '本人' when is_agent = 1 then '代叫' end as is_agent_name,
agent_telephone,
substr(order_time, 1, 4) as year,
date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss') as order_time,
date_format(order_time, 'yyyy-MM-dd') as order_data,
year(date_format(order_time, 'yyyy-MM-dd')) as order_year,
month(date_format(order_time, 'yyyy-MM-dd')) as order_month,
day(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) as order_day,
hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) as order_hour,
case
when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 1 and
hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 5 then '凌晨'
when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 5 and
hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 8 then '早上'
when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 8 and
hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 11 then '上午'
when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 11 and
hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 13 then '中午'
when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 13 and
hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 17 then '下午'
when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 17 and
hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 19 then '晚上'
when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 19 and
hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 20 then '半夜'
when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 20 and
hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 24 then '深夜'
when hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) >= 0 and
hour(date_format(concat(order_time, ':00'), 'yyyy-MM-dd HH:mm:ss')) < 1 then '凌晨' end,
date_format(order_time, 'yyyy-MM-dd HH:mm:ss')
from ods_didi.t_user_orderwhere length(order_time) >= 8 and dt = '2020-04-12';
create table if not exists dw_didi.t_user_cancel_order
(
orderId string,
Profession string,
age_range string,
Reason string,
cancel_time string
) partitioned by (dt string comment '2020-04-12')
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
insert overwrite table dw_didi.t_user_cancel_order partition (dt = '2020-04-12')
select orderId, profession, age_range, reason, cancel_timefrom ods_didi.t_user_cancel_orderwhere dt = '2020-04-12';
create table if not exists dw_didi.t_user_pay_order
(
id string comment '支付订单ID',
orderId string comment '订单ID',
real_pay_money double comment '实际支付总额',
has_coupon int comment '是否使用优惠券(0 - 不使用、1 - 使用)',
pay_way int comment '支付方式(0-微信支付、1-支付宝支付、3-QQ钱包支付、4- 一网通银行卡支付)',
mileage double comment '里程(单位公里)',
pay_time string comment '支付时间'
) partitioned by (dt string comment '2020-04-12') ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
insert overwrite table dw_didi.t_user_pay_order partition (dt = '2020-04-12')
select id,
orderId,
real_pay_money,
has_coupon,
pay_way,
mileage,
pay_timefrom ods_didi.t_user_pay_orderwhere dt = '2020-04-12';
create table if not exists dw_didi.t_user_evaluate
(
id string comment '评价日志唯一ID',
orderId string comment '订单ID',
eva_level int comment '评价等级(1 - 一颗星、... 5 - 五星)',
eva_time string comment '评价时间'
) partitioned by (dt string comment '时间分区') ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
insert overwrite table dw_didi.t_user_evaluate partition (dt = '2020-04-12')
select id, orderId, eva_level, eva_timefrom ods_didi.t_user_evaluatewhere dt = '2020-04-12';
-- 4.数据处理
-- 4.1 总订单笔数分析
-- 4.1.1计算4.12的总订单笔数分析
select max(dt) as `时间`, count(orderid) `订单总笔数`
from dw_didi.t_user_order_widewhere dt = '2020-04-12';
--建表
create table if not exists app_didi.t_order_total
(
date_val string comment '日期(yyyy-MM-dd)',
count int comment '订单笔数'
) partitioned by (month string comment '按月分区yyyy-MM')
row format delimited fields terminated by ',';
--加载数据据
insert into table app_didi.t_order_total partition (month = '2020-04')
select max(dt) as `时间`, count(orderid) as `订单时间`
from dw_didi.t_user_order_widewhere dt = '2020-04-12';
truncate table app_didi.t_order_total;
-- 4.2 预约订单/非预约订单占比分析-- sum,avg,max,min-- 预约单/总单*100%
select count(*) as cnt_totalfrom dw_didi.t_user_order_widewhere dt = '2020-04-12';
select max(order_date) as `日期`,
subscribe_name as `是否预约`,
count(subscribe_name) as cntfrom dw_didi.t_user_order_widewhere dt = '2020-04-12'
group by subscribe_name;
--左连接
select *
from (select max(order_date) as `日期`, subscribe_name as `是否预约`, count(subscribe_name) as cnt
from dw_didi.t_user_order_wide
where dt = '2020-04-12'
group by subscribe_name) t1
left join (select count(*) as cnt_total from dw_didi.t_user_order_wide where dt = '2020-04-12') t2;
-- 隐式内连接
select `日期`, `是否预约`, concat(round(cnt / cnt_total * 100, 2), '%') as `百分比`
from (select max(order_date) as `日期`, subscribe_name as `是否预约`, count(subscribe_name) as cnt
from dw_didi.t_user_order_wide
where dt = '2020-04-12'
group by subscribe_name) t1,
(select count(*) as cnt_total from dw_didi.t_user_order_wide where dt = '2020-04-12') t2;
--开窗函数
select order_date as `日期`,
subscribe_name as `是否预约`,
count(subscribe_name) over (partition by subscribe_name) cnt,
count() over () cnt_totalfrom dw_didi.t_user_order_widewhere dt = '2020-04-12';
-- group by subscribe_name;
--开窗函数2
select max(`日期`) as `日期`, `是否预约`, concat(round(max(cnt) / max(cnt_total) * 100, 2), '%') `百分比`
from (select order_date as `日期`,
subscribe_name as `是否预约`,
count(subscribe_name) over (partition by subscribe_name) cnt,
count() over () cnt_total
from dw_didi.t_user_order_wide
where dt = '2020-04-12') tgroup by `是否预约`;
--方法三
select `日期`, `是否预约`, concat(round((cnt / sum(cnt) over ()) * 100, 2), '%') `百分比`
from (select max(order_date) as `日期`, subscribe_name as `是否预约`, count(subscribe_name) as cnt
from dw_didi.t_user_order_wide
where dt = '2020-04-12'
group by subscribe_name) t;
create table if not exists app_didi.t_order_subscribe_percent
(
date_val string comment '日期',
subscribe_name string comment '是否预约',
percent_val string comment '百分比'
) partitioned by (month string comment '年月yyyy-MM') row format delimited fields terminated by ',';
select `日期`,
`是否预约`,
cnt / sum(cnt) over () *
100from(select max(order_date) as `日期`, subscribe_name as `是否预约`, count(subscribe_name) as cnt from dw_didi.t_user_order_wide where dt = '2020-04-12' group by subscribe_name) t;
insert overwrite table app_didi.t_order_subscribe_percent partition (month = '2020-04')
select `日期`, `是否预约`, concat(round((cnt / sum(cnt) over ()) * 100, 2), '%') `百分比`
from (select max(order_date) as `日期`, subscribe_name as `是否预约`, count(subscribe_name) as cnt
from dw_didi.t_user_order_wide
where dt = '2020-04-12'
group by subscribe_name) t;
-- 4.3不同时段订单的个数
create table if not exists app_didi.t_order_timerange_total
(
datetime string comment '日期',
timerange string comment '时间段',
count int comment '订单数量'
) partitioned by (month string comment '年月,yyyy-MM')
row format delimited fields terminated by ',';
--sql
select max(dt), order_time_range, count(*) as order_cntfrom dw_didi.t_user_order_widewhere dt = '2020-04-12'
group by order_time_range;
--加载数据
insert overwrite table app_didi.t_order_timerange_total partition (month = '2020-04')
select max(dt), order_time_range, count(*) as order_cntfrom dw_didi.t_user_order_widewhere dt = '2020-04-12'
group by order_time_range;
--4.4不同年龄段、时段订单个数
select max(dt),
age_range,
order_time_range,
count(*) as order_cntfrom dw_didi.t_user_order_widewhere dt = '2020-04-12'
group by age_range, order_time_range;
create table if not exists app_didi.t_order_age_and_time_range_total
(
datetime string comment '日期',
age_range string comment '年龄段',
order_time_range string comment '时段',
count int comment '订单数量'
) partitioned by (month string comment '年月,yyyy-MM') row format delimited fields terminated by ',';
insert overwrite table app_didi.t_order_age_and_time_range_total partition (month = '2020-04')
select max(dt),
age_range,
order_time_range,
count(*) as order_cntfrom dw_didi.t_user_order_widewhere dt = '2020-04-12'
group by age_range, order_time_range;
--4.4不同地域订单个数
select province, count(*) as order_cntfrom dw_didi.t_user_order_widewhere dt = '2020-04-12'
group by province;
--建表
create table if not exists app_didi.t_order_province_total
(
datetime string comment '日期',
province string comment '省份',
count int comment '订单数量'
) partitioned by (month string comment '年月,yyyy-MM') row format delimited fields terminated by ',';
insert overwrite table app_didi.t_order_province_total partition (month = '2020-04')
select '2020-04-12', province, count(*) as order_cntfrom dw_didi.t_user_order_widewhere dt = '2020-04-12'
group by province;
-- 4.5求订单客户职业排名top5
-- 第一步 :按职业分组求客户数量
select max(dt), profession, count(orderId)
from dw_didi.t_user_order_widewhere dt = '2020-04-12'
group by profession;
-- 第二部 排名
select dt1, profession, cnt, row_number() over (order by cnt desc )
from (select max(dt) as dt1, profession, count(orderId) cnt
from dw_didi.t_user_order_wide
where dt = '2020-04-12'
group by profession) t;
-- 取前五
select *
from (select dt1, profession, cnt, row_number() over (order by cnt desc ) as rk
from (select max(dt) as dt1, profession, count(orderId) cnt
from dw_didi.t_user_order_wide
where dt = '2020-04-12'
group by profession) t1) t2where rk <= 5;
with t1 as (select max(dt) dt1, profession, count(orderId) cnt
from dw_didi.t_user_order_wide
where dt = '2020-04-12'
group by profession),
t2 as (select dt1, profession, cnt, row_number() over (order by cnt desc ) as rk from t1)
select *
from t2where rk<=5;
select *
from (select t.profession, t.cnt, rank() over (order by t.cnt desc ) as rk
from (select profession, count(*) as cnt
from dw_didi.t_user_order_wide
group by profession) t) ttwhere tt.rk <= 5;
--建表
create table if not exists app_didi.t_order_profession_total_topn
(
profession string comment '职业',
Order_cnt int comment '订单数量',
rk int comment '排名'
) partitioned by (month string comment '年月,yyyy-MM') row format delimited fields terminated by ',';
--加载数据
insert overwrite table app_didi.t_order_profession_total_topn partition (month = '2020-04')
select *
from (select t.profession, t.cnt, rank() over (order by t.cnt desc ) as rk
from (select profession, count(*) as cnt
from dw_didi.t_user_order_wide
group by profession) t) ttwhere tt.rk <= 5;
--4.6用户订单取消占比
select '2020-04-12' date_val,
concat(round(t1.total_cnt / t2.total_cnt * 100, 2), '%') as cancel_order_percentfrom (select count(orderid) as total_cnt from ods_didi.t_user_cancel_order where dt = '2020-04-12') t1 , (select count(orderid) as total_cnt
from dw_didi.t_user_order_wide
where dt = '2020-04-12') t2;
--创建表
create table if not exists app_didi.t_order_cancel_order_percent
(
datetime string comment '日期',
cancel_order_percent string comment '百分比'
) partitioned by (month string comment '年月,yyyy-MM') row format delimited fields terminated by ',';
--加载数据
insert overwrite table app_didi.t_order_cancel_order_percent partition (month = '2020-04')
select '2020-04-12' date_val,
concat(round(t1.total_cnt / t2.total_cnt * 100, 2), '%') as percent_valfrom (select count(*) total_cnt from ods_didi.t_user_cancel_order where dt = '2020-04-12') t1 , (select count(*) total_cnt
from dw_didi.t_user_order_wide
where dt = '2020-04-12') t2;
-- 4.8统计用户取消订单原因top1
with t1 as (select reason, profession, count(reason) over () cnt
from dw_didi.t_user_cancel_order
where dt = '2020-04-12'),
t2 as (select reason, profession, cnt, row_number() over (order by cnt desc) as rk from t1)
select *
from t2where rk<=5;
insert overwrite table app_didi.t_order_cancel_reason partition (month = '2020-04')
select *
from (select t.profession, t.cnt, rank() over (order by t.cnt desc ) as rk
from (select profession, count(*) as cnt
from dw_didi.t_user_cancel_order
group by profession) t) ttwhere tt.rk <= 5;
--建表
create table if not exists app_didi.t_order_cancel_reason
(
profession string comment '职业',
cancel_cnt int comment '订单数量',
rk int comment '排名'
) partitioned by (month string comment '年月,yyyy-MM') row format delimited fields terminated by ',';
--统计每个省订单量最高的城市top3
select city, count(city)
from dw_didi.t_user_order_widewhere length(city) > 0group by city;
--建表
insert overwrite table app_didi.t_order_city partition (month = '2020-04')
select *
from (select t.city, t.cnt, rank() over (order by t.cnt desc ) as rk
from (select city, count(city) as cnt
from dw_didi.t_user_order_wide
where length(city) > 0
group by city) t) ttwhere tt.rk <= 3;
create table if not exists app_didi.t_order_city
(
city string comment '城市',
order_cnt int comment '订单数量',
rk int comment '排名'
) partitioned by (month string comment '年月,yyyy-MM')
row format delimited fields terminated by ',';
--统计订单支付中使用优惠券的百分比
create table if not exists app_didi.t_order_dicount
(
isdicount string comment '是否使用优惠券',
order_cnt string comment '百分比'
) partitioned by (month string comment '年月,yyyy-MM') row format delimited fields terminated by ',';
insert overwrite table app_didi.t_order_dicount partition (month = '2020-04')
select `是否使用优惠券`, concat(round((cnt / sum(cnt) over ()) * 100, 2), '%') `百分比`
from (select has_coupon as `是否使用优惠券`, count(has_coupon) as cnt
from dw_didi.t_user_pay_order
where has_coupon != 17
group by has_coupon) t;
--统计用户五星级好评的百分比
create table if not exists app_didi.t_order_five_start
(
fivestart string comment '是否是5',
order_cnt string comment '百分比'
) partitioned by (month string comment '年月,yyyy-MM') row format delimited fields terminated by ',';
insert overwrite table app_didi.t_order_five_start partition (month = '2020-04')
select `是否是5`, concat(round((cnt / sum(cnt) over ()) * 100, 2), '%') `百分比`
from (select eva_level as `是否是5`, count(eva_level) as cnt
from dw_didi.t_user_evaluate
where length(eva_level) > 0
group by eva_level) t;

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
create database app_edu_chat;
use app_edu_chat;
create table if not exists app_edu_chat.total_year
(
date_year varchar(20) comment '年份',
date_day varchar(20) comment '日',
total_pel varchar(20) comment '访问客户量'
);
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table total_year \
--export-dir /user/hive/warehouse/app_edu.db/total_year/dt=2019
create table if not exists app_edu_chat.total_quater
(
date_year varchar(20),
date_quater varchar(20),
date_day varchar(20),
total_pel varchar(20)
)
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table total_quater \
--export-dir /user/hive/warehouse/app_edu.db/total_quater/dt=2019

create table if not exists app_edu_chat.total_month
(
date_year varchar(20),
date_month varchar(20),
date_day varchar(20),
total_pel varchar(20)
);
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table total_month \
--export-dir /user/hive/warehouse/app_edu.db/total_month/dt=2019

create table if not exists app_edu_chat.area_day
(
date_year varchar(20),
date_day varchar(20),
province varchar(20),
total_pel varchar(20)
)
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table area_day \
--export-dir /user/hive/warehouse/app_edu.db/area_day/dt=2019


create table if not exists app_edu_chat.click_tender
(
date_year varchar(20),
date_month varchar(20),
date_day varchar(20),
province varchar(20),
con_cnt int,
percent_ varchar(20)

) ;
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table click_tender \
--export-dir /user/hive/warehouse/app_edu.db/click_tender/dt=2019


create table if not exists app_edu_chat.vis_tender
(
date_year varchar(20),
date_month varchar(20),
date_day varchar(20),
con_cnt int,
percent_ varchar(20)

)
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table vis_tender \
--export-dir /user/hive/warehouse/app_edu.db/vis_tender/dt=2019

create table if not exists app_edu_chat.vis_per_hour
(

date_hour varchar(20),
con_cnt int

);
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table vis_per_hour \
--export-dir /user/hive/warehouse/app_edu.db/vis_per_hour/dt=2019

create table if not exists app_edu_chat.dif_channel
(
chennel_c varchar(20),
visiting int,
percent_ varchar(20)

)
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table dif_channel \
--export-dir /user/hive/warehouse/app_edu.db/dif_channel/dt=2019

create table if not exists app_edu_chat.dif_source
(
source_c varchar(20),
visit_count int,
percent_ varchar(20)

)
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table dif_source \
--export-dir /user/hive/warehouse/app_edu.db/dif_source/dt=2019


create table if not exists app_edu_chat.top_web
(
date_year varchar(20),
date_month varchar(20),
date_day varchar(20),
brouse_name varchar(20),
vis_cnt int
)
sqoop export \
--connect jdbc:mysql://192.168.52.161:3306/app_edu_chat \
--username root \
--password 123456 \
--table top_web \
--export-dir /user/hive/warehouse/app_edu.db/top_web/dt=2019


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
create database ods_edu;
use ods_edu;
-- create table if not exists

create database dw_edu;
create database app_edu;


create table if not exists dw_edu.t_edu_chat
(
id int comment '主键',
create_date_time timestamp comment '数据创建时间',
sid string comment '访客id',
create_time timestamp comment '会话创建时间',
seo_source string comment '搜索来源',
seo_keywords string comment '关键字',
ip string comment 'IP地址',
area string comment '地域',
country string comment '所在国家',
province string comment '省',
city string comment '城市',
origin_channel string comment '投放渠道',
`user` string comment '所属坐席',

manual_time timestamp comment '人工开始时间',

begin_time timestamp comment '坐席领取时间 ',

end_time timestamp comment '会话结束时间',

last_customer_msg_time_stamp timestamp comment '客户最后一条消息的时间',

last_agent_msg_time_stamp timestamp comment '坐席最后一下回复的时间',


reply_msg_count int comment '客服回复消息数',
msg_count int comment '客户发送消息数',
browser_name string comment '浏览器名称',
os_info string comment '系统名称'
) partitioned by (dt string comment '2019')
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';


insert overwrite table dw_edu.t_edu_chat partition (dt = '2019')
select id,
create_date_time,
sid,
create_time,

seo_source comment,
seo_keywords,
ip,
area,
country,
province,
city,
origin_channel,
`user`,
manual_time,

begin_time,

end_time,

last_customer_msg_time_stamp,

last_agent_msg_time_stamp,

reply_msg_count,
msg_count,
browser_name,
os_info
from web_chat_ems_2019_07;


--1.总访问客户量
-- 1.1年日
create table if not exists app_edu.total_year
(
date_year string comment '年份',
date_day string comment '日',
total_pel string comment '访问客户量'
) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';

insert overwrite table app_edu.total_year partition (dt ='2019')
SELECT
YEAR(create_time) AS `年份`,
DAY(create_time) AS `日`,
COUNT(DISTINCT sid) AS `访问客户量`
FROM dw_edu.t_edu_chat
WHERE YEAR(create_time) BETWEEN '2019' AND '2020'
GROUP BY YEAR(create_time), DAY(create_time)
ORDER BY YEAR(create_time), DAY(create_time);
--1.2季度日
create table if not exists app_edu.total_quater
(
date_year string comment '年份',
date_quater string comment '季度',
date_day string comment '日',
total_pel string comment '访问客户量'
) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';

insert overwrite table app_edu.total_quater partition (dt ='2019')
SELECT YEAR(create_time) AS `年份`,
QUARTER(create_time) AS `季度`,
DAY(create_time) AS `日期`,
COUNT(DISTINCT sid) AS `访问客户量`
FROM dw_edu.t_edu_chat
WHERE YEAR(create_time) = '2019'
AND QUARTER(create_time) BETWEEN 1 AND 4
GROUP BY YEAR(create_time), QUARTER(create_time), DAY(create_time)
ORDER BY YEAR(create_time), QUARTER(create_time), DAY(create_time);


--1.3月日
create table if not exists app_edu.total_month
(
date_year string comment '年份',
date_month string comment '月份',
date_day string comment '日',
total_pel string comment '访问客户量'
) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';

insert overwrite table app_edu.total_month partition (dt ='2019')
SELECT YEAR(create_time) AS `年份`,
MONTH(create_time) AS `月份`,
DAY(create_time) AS `日`,
COUNT(DISTINCT sid) AS `访问客户量`
FROM dw_edu.t_edu_chat
WHERE YEAR(create_time) = '2019'
GROUP BY YEAR(create_time), MONTH(create_time), DAY(create_time)
ORDER BY YEAR(create_time), MONTH(create_time), DAY(create_time);
-- 2 地区独立访客热力图
-- 2.1.1 地区日
create table if not exists app_edu.area_day
(
date_year string comment '年份',
date_day string comment '日',
province string ,
total_pel string comment '访问客户量'
) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';


insert overwrite table app_edu.area_day partition (dt ='2019')
SELECT YEAR(create_time) AS `年份`,
DAY(create_time) AS `日`,
province AS `省份`,
COUNT(DISTINCT sid) AS `访问量`
FROM dw_edu.t_edu_chat
WHERE create_time is not null
and YEAR(create_time) BETWEEN '2019' AND '2020'
GROUP BY YEAR(create_time), DAY(create_time), province
ORDER BY YEAR(create_time), DAY(create_time), province;
-- 2.2 地区月

--2.2季度日
SELECT YEAR(create_time) AS `年份`,
QUARTER(create_time) AS `季度`,
DAY(create_time) AS `日期`,
province AS `省份`,
COUNT(DISTINCT sid) AS `访问客户量`
FROM dw_edu.t_edu_chat
WHERE YEAR(create_time) = '2019'
AND QUARTER(create_time) BETWEEN 1 AND 4
GROUP BY YEAR(create_time), QUARTER(create_time), DAY(create_time), province
ORDER BY YEAR(create_time), QUARTER(create_time), DAY(create_time), province;

--2.3 month
SELECT YEAR(create_time) AS `年份`,
MONTH(create_time) AS `月份`,
DAY(create_time) AS `日期`,
province,
COUNT(DISTINCT sid) AS `访问客户量`
FROM dw_edu.t_edu_chat
WHERE YEAR(create_time) = '2019'
GROUP BY YEAR(create_time), MONTH(create_time), DAY(create_time), province
ORDER BY YEAR(create_time), MONTH(create_time), DAY(create_time), province;


-- SELECT province, COUNT(DISTINCT CASE WHEN msg_count > 0 THEN sid END) AS `咨询人数`
-- FROM dw_edu.t_edu_chat
-- GROUP BY province;


-- 3. 访客咨询率趋势
create table if not exists app_edu.click_tender
(
date_year string comment '年份',
date_month string ,
date_day string,
province string ,
con_cnt int,
percent_ string

) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';

insert overwrite table app_edu.click_tender partition (dt ='2019')
SELECT
YEAR(create_time) AS `年份`,
MONTH(create_time) AS `月份`,
DAY(create_time) AS `日期`,
province, COUNT(DISTINCT CASE WHEN msg_count > 0 THEN sid END) AS `咨询人数`,
concat(round(COUNT(DISTINCT CASE WHEN msg_count > 0 THEN sid END) / total.total_consultations * 100, 2), '%') as `百分比`
FROM dw_edu.t_edu_chat
CROSS JOIN (SELECT COUNT(DISTINCT CASE WHEN msg_count > 0 THEN sid END) AS total_consultations FROM dw_edu.t_edu_chat) AS total
GROUP BY YEAR(create_time), MONTH(create_time), DAY(create_time), province, total.total_consultations;

--4. 客户访问量和访客咨询率双轴趋势
-- concat(round( COUNT(DISTINCT CASE WHEN msg_count > 0 THEN sid END) / COUNT(DISTINCT sid) * 100, 2), '%') as `百分比`
create table if not exists app_edu.vis_tender
(
date_year string comment '年份',
date_month string ,
date_day string,
con_cnt int,
percent_ string

) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';
insert overwrite table app_edu.vis_tender partition (dt ='2019')
SELECT YEAR(create_time) AS `年份`,
MONTH(create_time) AS `月份`,
DAY(create_time) AS `日期`,
COUNT(DISTINCT sid) AS `客户访问量`,
concat(round( COUNT(DISTINCT CASE WHEN msg_count > 0 THEN sid END) / COUNT(DISTINCT sid) * 100, 2), '%') AS `咨询率`
FROM dw_edu.t_edu_chat
-- WHERE create_time BETWEEN '2019-01-01' AND '2020' -- 替换为实际的日期范围
GROUP BY YEAR(create_time), MONTH(create_time), DAY(create_time);
-- 5. 时间段访问客户量趋势
-- 指定时间段的起始日期和结束日期
-- DECLARE @start_date DATE = '开始日期';
-- DECLARE @end_date DATE = '结束日期';

-- 查询指定时间段内每个小时的访问客户量
create table if not exists app_edu.vis_per_hour
(

date_hour string,
con_cnt int

) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';
insert overwrite table app_edu.vis_per_hour partition (dt ='2019')
SELECT HOUR(create_time) AS `小时`,
COUNT(DISTINCT sid) AS `访问客户量`
FROM dw_edu.t_edu_chat
-- WHERE create_date_time >= @start_date AND create_date_time <= @end_date
GROUP BY HOUR(create_time)
ORDER BY HOUR(create_time);
-- 6.
-- 查询指定时间段内不同来源渠道和小时的访问客户量
create table if not exists app_edu.dif_channel
(
chennel_c string,
visiting int,
percent_ string

) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';
insert overwrite table app_edu.dif_channel partition (dt ='2019')
SELECT origin_channel AS `来源渠道`,
COUNT(DISTINCT sid) AS `访问客户量`,
concat(round( COUNT(DISTINCT sid) * 100.0 / (
SELECT COUNT(DISTINCT sid)
FROM dw_edu.t_edu_chat

) , 2), '%') AS `占比`
FROM dw_edu.t_edu_chat
GROUP BY origin_channel
ORDER BY origin_channel;
-- 7.
-- 指定时间段的起始日期和结束日期
create table if not exists app_edu.dif_source
(
source_c string,
visit_count int,
percent_ string

) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';
insert overwrite table app_edu.dif_source partition (dt ='2019')
SELECT
seo_source,
COUNT(DISTINCT sid) AS visitor_count,
concat(round( COUNT(DISTINCT sid) * 100.0 / (
SELECT COUNT(DISTINCT sid)
FROM dw_edu.t_edu_chat

) , 2), '%') AS visitor_ratio
FROM dw_edu.t_edu_chat
GROUP BY seo_source;



-- 8.
-- 查询访问客户量最多的页面排行榜
create table if not exists app_edu.top_web
(
date_year string comment '年份',
date_month string ,
date_day string,
brouse_name string,
vis_cnt int

) partitioned by (dt string comment '2019') row format delimited fields terminated by ',';
insert overwrite table app_edu.top_web partition (dt ='2019')
SELECT YEAR(create_time) AS `年份`,
MONTH(create_time) AS `月份`,
DAY(create_time) AS `日期`,
browser_name,
COUNT(1) AS visitor_count
FROM ods_edu.web_chat_ems_2019_07
GROUP BY browser_name,YEAR(create_time),MONTH(create_time),DAY(create_time)
ORDER BY visitor_count DESC
LIMIT 20;


-------------本文结束感谢您的阅读-------------
老板你好,讨口饭吃