[关闭]
@songlaf 2016-05-26T20:04:14.000000Z 字数 1474 阅读 587

作业十八【Hive日志分析案例】

北风网大数据培训


一) 建立Hive表

  1. #建立数据库
  2. create database web;
  3. #建立表
  4. create table track_log(
  5. id string,
  6. url string,
  7. referer string,
  8. keyword string,
  9. type string,
  10. guid string,
  11. pageId string,
  12. moduleId string,
  13. linkId string,
  14. attachedInfo string,
  15. sessionId string,
  16. trackerU string,
  17. trackerType string,
  18. ip string,
  19. trackerSrc string,
  20. cookie string,
  21. orderCode string,
  22. trackTime string,
  23. endUserId string,
  24. firstLink string,
  25. sessionViewNo string,
  26. productId string,
  27. curMerchantId string,
  28. provinceId string,
  29. cityId string,
  30. fee string,
  31. edmActivity string,
  32. edmEmail string,
  33. edmJobId string,
  34. ieVersion string,
  35. platform string,
  36. internalKeyword string,
  37. resultSum string,
  38. currentPage string,
  39. linkPosition string,
  40. buttonPosition string
  41. )
  42. partitioned by (date string,hour string)
  43. row format delimited fields terminated by '\t' ;

二) 导入数据到Hive

  1. load data local inpath '/home/sjf/2015082818' into table track_log partition(date='20150828',hour='18') ;
  2. load data local inpath '/home/sjf/2015082819' into table track_log partition(date='20150828',hour='19') ;

三)统计结果

  1. #建立hive临时表
  2. create table track_log_tmp(
  3. date string,
  4. hour string,
  5. pv string,
  6. uv string
  7. )
  8. row format delimited fields terminated by '\t';
  9. #查询统计数据
  10. insert into table track_log_tmp select date,hour,count(url),count(distinct guid) from track_log group by date,hour ;

四)数据导入Mysql

  1. #建立Mysql表
  2. create table track_log(
  3. date varchar(20),
  4. hour varchar(10),
  5. pv varchar(50),
  6. uv varchar(50),
  7. primary key (date,hour)
  8. ) ;
  1. #导入数据到Mysql
  2. bin/sqoop export \
  3. --connect jdbc:mysql://njt.song.s0:3306/web \
  4. --username root \
  5. --password 123456 \
  6. --table track_log \
  7. --export-dir /user/hive/warehouse/track_log_tmp \
  8. --input-fields-terminated-by '\t'

执行结果:
3333333333.png-15.5kB

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注