@awsekfozc
2015-12-03T23:04:03.000000Z
字数 2446
阅读 1545
Hive
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]
说明:
LOCAL:加载本地文件
OVERWRITE:覆盖原有数据
PARTITION:分区数据
##加载HDFS上的文件
load data inpath '/user/hive/warehouse/testdb.db/emp_ext/emp.txt' into table emp ;
##加载本地文件
load data local inpath '/opt/datas/dept.txt' into table dept ;
##加载覆盖原有数据
load data local inpath '/opt/datas/dept.txt' overwrite into table dept ;
INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 FROM from_statement;
说明:
- OVERWRITE:覆盖原有数据
- PARTITION:分区数据
- IF NOT EXISTS:如果不存在
- select_statement1 FROM from_statement:查询结果集
INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement;
hive (testdb)> create table testdb.emp_like like testdb.emp ;
##加载数据到表
hive (testdb)> insert into table testdb.emp_like select * from testdb.emp ;
##覆盖数据到表
hive (testdb)> insert OVERWRITE table testdb.emp_like select * from testdb.emp ;
INSERT OVERWRITE [LOCAL] DIRECTORY directory1 [ROW FORMAT row_format] SELECT ... FROM ...
##保存到本地目录的文件中
insert overwrite local directory '/opt/datas/hivedata' select * from testdb.emp ;
##保存到本地目录的文件中,加入分隔符
insert overwrite local directory '/opt/datas/hivedata2'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
select * from testdb.emp ;
##保存数据到HDFs文件系统上
insert overwrite directory '/user/zc/hive/hivedata' select * from testdb.emp ;
##命令模式
bin/hive -e "select * from testdb.emp ;" > '/opt/datas/exp_emp_res.txt'
EXPORT TABLE tablename [PARTITION (part_column="value"[, ...])]
TO 'export_target_path'
说明:
PARTITION:到处分区数据
export_target_path:导出路径
IMPORT [[EXTERNAL] TABLE new_or_original_tablename [PARTITION (part_column="value"[, ...])]]
FROM 'source_path'
[LOCATION 'import_target_path']
说明:
EXTERNAL:导入外部表
new_or_original_tablename:导入后的表名
PARTITION:导入分区
source_path:数据路径
LOCATION:导入后表数据存放路径
##导出表数据
EXPORT TABLE testdb.emp TO '/user/zc/hive/datas/export/emp' ;
##导入表数据
IMPORT FROM '/user/zc/hive/datas/export/emp' ;
##导出分区表数据
EXPORT TABLE emp_partition PARTITION (month="201512",day="01") TO '/user/zc/hive/datas/export/emp_partition' ;
SELECT [ALL | DISTINCT] select_expr, select_expr, ...
FROM table_reference
[WHERE where_condition]
[GROUP BY col_list]
[CLUSTER BY col_list
| [DISTRIBUTE BY col_list] [SORT BY col_list]
]
[LIMIT number]
说明:
- ALL:显示全部数据
- DISTINCT:去除重复数据
- select_expr:查询表达式
- table_reference:数据集
- where_condition:条件
- GROUP BY:全局排序
- SORT BY:每个reduce内部排序,全局不是排序
- DISTRIBUTE BY:类似MR中的partition,分区。结合SORT BY使用,放在SORT BY之前
- CLUSTER BY:distribute by 和 sort by 字段相同时,使用cluster by 代替
- LIMIT:只显示结果的number条数据
在此输入正文