三、基本sql

一、数据库创建与删除

1.强制删除数据库

1	drop database itcast cascade;

2.创建数据库

1	create database if not exists myhive;

3. 切换使用数据库

1	use myhive;

4. 查看数据库详细信息

1	desc database myhive;

5.删除数据库

1	drop database myhive;

6.查询当前数据库

1	select current_database();

二、数据库表

1.删除表

1	drop table biao ;

2.创建表

create table t_archer(  id int comment "ID",
                        name string comment "英雄名称",
                        hp_max int comment "最大生命",
                         mp_max int comment "最大法力",
                         attack_max int comment "最高物攻",
                          defense_max int comment "最大物防",
                            attack_range string comment "攻击范围",
                             role_main string comment "主要定位",
                             role_assist string comment "次要定位"
) comment "王者荣耀射手信息"  row format delimited
fields terminated by "\t";

3.Hive建表时候的字段类型

分类	类型	描述	字面量示例
原始类型	BOOLEAN	true/false	TRUE
	TINYINT	1字节的有符号整数 -128~127	1Y
	SMALLINT	2个字节的有符号整数，-32768~32767	1S
	INT	4个字节的带符号整数	1
	BIGINT	8字节带符号整数	1L
	FLOAT	4字节单精度浮点数1.0
	DOUBLE	8字节双精度浮点数	1.0
	DEICIMAL	任意精度的带符号小数	1.0
	STRING	字符串，变长	“a”,’b’
	VARCHAR	变长字符串	“a”,’b’
	CHAR	固定长度字符串	“a”,’b’
	BINARY	字节数组	无法表示
	TIMESTAMP	时间戳，毫秒值精度	122327493795
	DATE	日期	‘2016-03-29’
	INTERVAL	时间频率间隔
复杂类型	ARRAY	有序的的同类型的集合	array(1,2)
	MAP	key-value,key必须为原始类型，value可以任意类型	map(‘a’,1,’b’,2)
	STRUCT	字段集合,类型可以不同	struct(‘1’,1,1.0), named_stract(‘col1’,’1’,’col2’,1,’clo3’,1.0)
	UNION	在有限取值范围内的一个值	create_union(1,’a’,63)

4.创建内部表

1
2
3

create external table student_exter(
sid string
);

5.根据查询结果创建表

1
2
3

select *
from student;
select sid,sname,sbirth,ssex from student;

6.去重结果

select distinct ssex
from student;

select distinct ssex,sname,sbirth
from student;

7.根据已经存在的表结构创建表

1
2
3

create table stu4 like stu2;
select * from stu4;
注意: 只拷贝指定表的结构, 不拷贝表的数据

8.查询表的类型

1	desc formatted stu2;

显示表的简要信息

1	desc stu2;

9.数据装载载命令Load

1	load data [local] inpath '/export/data/hive_data/student.txt' [overwrite] into table student [partition (partcol1=val1,…)];

1、load data:表示加载数据

2、local: 表示从本地加载数据到hive表；否则从HDFS加载数据到hive表

3、inpath:表示加载数据的路径

4、overwrite:表示覆盖表中已有数据，否则表示追加

5、into table:表示加载到哪张表

6、student:表示具体的表

7、partition:表示上传到指定分区

1	load data local inpath '/export/data/didi/order.csv' into table ods_didi.t_user_order partition (dt = '2020-04-12');

select ssex
from student;

select all ssex
from student;

-- 去重结果
select distinct ssex
from student;

select distinct ssex,sname,sbirth
from student;

select *
from student where 1>2;
select *
from student where 1=1;

select *
from student where sname="李勇";

select *
from student where  length(sname)>2;

select sid
from student
where (sid)>95005
group by sid;


--4、聚合操作
--统计美国总共有多少个县county
select count(sid) from student;
--统计美国加州有多少个县
select count(sid) from student where sname = "李峰";
--统计德州总死亡病例数
select sum(sid) from student where sname = "李峰";
--统计出美国最高确诊病例数是哪个县  select max(cases) from t_usa_covid19;



-- --5、GROUP BY
-- --根据state州进行分组 统计每个州有多少个县county
-- select count(county) from student where count_date = "2021-01-28" group by state;
--
-- --想看一下统计的结果是属于哪一个州的
-- select state,count(county) from student where count_date = "2021-01-28" group by state;
--
-- --再想看一下每个县的死亡病例数，我们猜想很简单呀	把deaths字段加上返回	真实情况如何呢？
-- select state,count(county),deaths fromstudent where count_date = "2021-01-28" group by state;
-- --很尴尬 sql报错了org.apache.hadoop.hive.ql.parse.SemanticException:Line 1:27 Expression not in GROUP BY key 'deaths'
--
-- --为什么会报错？？group by的语法限制
-- --结论：出现在GROUP BY中select_expr的字段：要么是GROUP BY分组的字段；要么是被聚合函数应用的字段。
-- --deaths不是分组字段 报错
-- --state是分组字段 可以直接出现在select_expr中
--
-- --被聚合函数应用
-- select state,count(county),sum(deaths) from t_usa_covid19 where count_date = "2021-01-28" group by state;


------------String Functions 字符串函数------------
select length("itcast");
select reverse("itcast");
select concat("angela","baby");
--带分隔符字符串连接函数：concat_ws(separator, [string | array(string)]+)

select concat_ws('.', 'www', array('itcast', 'cn'));
--字符串截取函数：substr(str, pos[, len]) 或者 substring(str, pos[, len])
select substr("angelababy",-2); --pos是从1开始的索引，如果为负数则倒着数
select substr("angelababy",2,2);
--分割字符串函数: split(str, regex)
-select split('apache hive', ' ');



----------- Date Functions 日期函数 -----------------
--获取当前日期: current_date
select current_date();
--获取当前UNIX时间戳函数: unix_timestamp
select unix_timestamp();
--日期转UNIX时间戳函数: unix_timestamp
select unix_timestamp("2011-12-07 13:01:03");
--指定格式日期转UNIX时间戳函数: unix_timestamp
select unix_timestamp('20111207 13:01:03','yyyyMMdd HH:mm:ss');
--UNIX时间戳转日期函数: from_unixtime
select from_unixtime(1618238391);
select from_unixtime(0, 'yyyy-MM-dd HH:mm:ss');
--日期比较函数: datediff 日期格式要求'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd'
select datediff('2012-12-08','2012-05-09');
--日期增加函数: date_add
select date_add('2012-02-28',10);
--日期减少函数: date_sub
select date_sub('2012-01-1',10);




----Mathematical Functions 数学函数-------------
--取整函数: round  返回double类型的整数值部分 （遵循四舍五入）
select round(3.1415926);
--指定精度取整函数: round(double a, int d) 返回指定精度d的double类型
select round(3.1415926,4);
--取随机数函数: rand 每次执行都不一样 返回一个0到1范围内的随机数
select rand();
--指定种子取随机数函数: rand(int seed) 得到一个稳定的随机数序列
select rand(3);



-----Conditional Functions 条件函数------------------
--使用之前课程创建好的student表数据
select * from student limit 3;

--if条件判断: if(boolean testCondition, T valueTrue, T valueFalseOrNull)
select if(1=2,100,200);
select if(sex ='男','M','W') from student limit 3;

--空值转换函数: nvl(T value, T default_value)
select nvl("allen","itcast");
select nvl(null,"itcast");

--条件转换函数: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END
select case 100 when 50 then 'tom' when 100 then 'mary' else 'tim' end;  select case sex when '男' then 'male' else 'female' end from student limit 3;


create table user_access (
     group_id string,
     createtime string, --day
     pv int
)row format DELIMITED FIELDS TERMINATED BY ',';
load data local inpath '/export/data/hivedata/group_access.txt' into table user_access;


select group_id, createtime, pv,
       rank() over(partition by createtime order by pv desc) AS rn1,
       dense_rank() over(partition by createtime order by pv desc) as rn2,
       row_number() over(partition by createtime order by pv desc) as rn3 from user_access;


select group_id, createtime, pv,
       rank() over(partition by createtime order by pv desc) AS rn1,
       dense_rank() over(partition by createtime order by pv desc) as rn2,
       row_number() over(partition by createtime order by pv desc) as rn3 from user_access;

sqoop import
–connect jdbc:mysql://node1:3306/
–username root
–password 123456
–table web_chat_ems_2019_07
–warehouse-dir /user/hive/warehouse –hive-database ods_edu
–hive-import
–hive-table web_chat_ems_2019_07