@zhangyy
2018-03-29T14:49:44.000000Z
字数 1817
阅读 179
hive的部分
依据日志文件,参考字段,统计分析每日各时段的pv和uv
- 建立Hive表,表列分隔符需要与原文件保持一致
- Load加载数据到Hive表
- 写Hive sql统计,结果落地到Hive表2
- 从Hive表2导出结果到mysql表
create database db_track;
use db_track;
create table track_log(
id string,
url string,
referer string,
keyword string,
type string,
guid string,
pageId string,
moduleId string,
linkId string,
attachedInfo string,
sessionId string,
trackerU string,
trackerType string,
ip string,
trackerSrc string,
cookie string,
orderCode string,
trackTime string,
endUserId string,
firstLink string,
sessionViewNo string,
productId string,
curMerchantId string,
provinceId string,
cityId string,
fee string,
edmActivity string,
edmEmail string,
edmJobId string,
ieVersion string,
platform string,
internalKeyword string,
resultSum string,
currentPage string,
linkPosition string,
buttonPosition string
)
partitioned by (date string,hour string)
row format delimited fields terminated by '\t' ;
load data local inpath '/home/hadoop/2015082818' into table track_log partition(date='20150828',hour='18') ;
load data local inpath '/home/hadoop/2015082819' into table track_log partition(date='20150828',hour='19') ;
select date,hour,count(url),count(distinct guid) from track_log where date='20150828' group by date,hour ;
create table track_log_tmp(
date string,
hour string,
pv string,
uv string
)
row format delimited fields terminated by '\t';
insert into table track_log_tmp select date,hour,count(url),count(distinct guid) from track_log where date='20150828' group by date,hour ;
create table track_log(
date varchar(20),
hour varchar(10),
pv varchar(50),
uv varchar(50),
primary key (date,hour)
) ;
bin/sqoop export \
--connect jdbc:mysql://namenode01.hadoop.com:3306/yangyang \
--username root \
--password 123456 \
--table track_log \
--export-dir /user/hive/warehouse/db_track.db/track_log_tmp \
--input-fields-terminated-by '\t'