[关闭]
@zhangyy 2017-07-22T17:39:02.000000Z 字数 1677 阅读 154

hive 的数据案例分析二

hive的部分


  • 根据hive的案例一增加需求

一: 增加案例需求:

  1. 统计 pv , uv , 登录人数 , 游客人数 , 平均访问时长 , 二跳率 , 独立IP
  2. 用一张表去处理

  1. show partitions track_log ;

image_1akacgkv21q8015ugmrq1l1fpea9.png-7.2kB

  1. create table session_info(
  2. session_id string,
  3. guid string,
  4. trackerU string,
  5. landing_url string,
  6. landing_url_ref string,
  7. user_id string,
  8. pv string,
  9. stay_time string,
  10. min_trackTime string,
  11. ip string,
  12. provinceId string
  13. )
  14. partitioned by (date string)
  15. row format delimited fields terminated by '\t' ;

image_1akaflsmn1pca1ll4kcu107hp021g.png-23.6kB

  1. create table session_info_tmp1 as select
  2. sessionId session_id,
  3. max(guid) guid,
  4. --trackerU string,
  5. --landing_url string,
  6. --landing_url_ref string,
  7. max(endUserId) user_id,
  8. count(url) pv,
  9. (max(unix_timestamp(trackTime)) - min(unix_timestamp(trackTime))) stay_time,
  10. min(trackTime) min_trackTime ,
  11. max(ip) ip,
  12. max(provinceId) provinceId
  13. from track_log where date='20150828'
  14. group by sessionId ;

image_1akafgfel12phthja53a4f14kvm.png-36.6kB

  1. create table session_info_tmp2 as select
  2. sessionId session_id,
  3. trackTime trackTime,
  4. trackeru trackerU,
  5. url landing_url,
  6. referer landing_url_ref
  7. from track_log where date='20150828' ;

image_1akafk8g1tpr18ap4bp1h32cn913.png-60.8kB

  1. insert overwrite table session_info partition (date='20150828')
  2. select
  3. a.session_id,
  4. a.guid,
  5. b.trackerU,
  6. b.landing_url,
  7. b.landing_url_ref,
  8. a.user_id,
  9. a.pv,
  10. a.stay_time,
  11. a.min_trackTime,
  12. a.ip,
  13. a.provinceId
  14. from session_info_tmp1 a join session_info_tmp2 b
  15. on a.session_id=b.session_id
  16. and a.min_trackTime=b.trackTime ;

image_1akafs6ipugb1p5o1vogvhpnoo1t.png-46.3kB

  1. create table vistor_users_info as
  2. select
  3. date,
  4. count(distinct guid) UV,
  5. sum(pv) PV,
  6. count(case when user_id != '' then user_id else null end) login_users,
  7. count(case when user_id = '' then user_id else null end) vistor_users,
  8. avg(stay_time) avg_stay_time,
  9. count(case when pv>=2 then session_id else null end)/count(session_id) sec_ratio,
  10. count(distinct ip) ip
  11. from session_info where date='20150828'
  12. group by date ;

image_1akag6l92upa17uf1ba11aii4ak2a.png-41.3kB

  1. select * from vistor_users_info;

image_1akag8pme1dspe58uli1ejvh602n.png-19.8kB

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注