@fanxy
2018-03-06T10:07:53.000000Z
字数 8489
阅读 2966
樊潇彦 复旦大学经济学院 中级宏观
# 安装程序包install.packages("dplyr") # 读取和整理数据install.packages("tidyr")install.packages("data.table")install.packages("foreign")install.packages("readstata13")install.packages("haven")install.packages("ggplot2") # 作图install.packages("ggrepel")install.packages("dygraphs")install.packages("plotrix")install.packages("lubridate") # 处理时序数据install.packages("zoo")install.packages("mFilter")# 调用程序包library(dplyr)library(tidyr)library(data.table)library(foreign)library(readstata13)library(haven) # 有空字符时,用write_dta生成.dtalibrary(ggplot2)library(ggrepel)library(dygraphs) # 动态作图library(plotrix) # 双轴作图library(lubridate) # 可以用year()、month()等提取日期library(zoo) # 处理时序数据library(mFilter) # HP滤波# 设定工作目录setwd("D:\\...") # 设定工作目录rm(list=ls()) # 清内存
# 数据读取和整理gdp=fread("GDP.csv",header=T) # 读取数据str(gdp) # 查看数据结构gdp_var=gdp%>%select(varid,varname,var) # 指标一览gdp=gdp%>%select(-varid,-varname)%>% # 选择除varid和varname之外的所有指标gather(year,value,-var)%>% # 将列指标1952-2015按年整合mutate(year=as.numeric(year))%>% # 将字符型指标转换为数字型filter(!is.na(value))%>% # 选取value不为空值的样本arrange(var,year)%>% # 按var和year排序spread(var,value) # 按var展开为列指标head(gdp) # 查看前6个样本
# 计算对数实际人均GDP并分解gdppa=gdp%>%select(year,gdppa,gdppaid)%>%mutate(gdppaid=ifelse(year==1952,100,gdppaid)) %>%mutate(realgdppa_1952=gdp$gdppa[gdp$year==1952]*cumprod(gdppaid/100))%>%mutate(realgdppa_2015=realgdppa_1952/realgdppa_1952[gdp$year==2015]*gdppa[gdp$year==2015])gdppa_hp=hpfilter(log(gdppa$realgdppa_2015),drift=F, # 用hp滤波提取趋势和周期freq=100,type="lambda") # 年度数据lambda=100,季度和月度分别为1600和14400gdppa_hp=data.frame(year=gdppa$year, trend=gdppa_hp$trend,cycle=gdppa_hp$cycle)twoord.plot(lx = gdppa_hp$year, ly = gdppa_hp$trend,rx = gdppa_hp$year, ry = gdppa_hp$cycle,main = '对数实际人均GDP:1952-2015', xlab = '',ylab = '趋势', rylab = '周期',type = c('line','line'))
# 生产结构和支出结构gdpstr=gdp%>%mutate(sec1=gdp1/gdp)%>%mutate(sec2=gdp2/gdp)%>%mutate(sec3=gdp3/gdp)%>%mutate(con=gdpexpcpri/gdpexp)%>%mutate(inv=gdpexpi/gdpexp)%>%mutate(gov=gdpexpcgov/gdpexp)%>%select(year,sec1:gov)%>%gather(var,share,-year)ggplot(gdpstr[gdpstr$var %in% c("sec1","sec2","sec3"),],aes(year,share,color=var))+geom_line(size=1)+ labs(title="生产法GDP结构",x="",y="")+scale_colour_discrete(labels = c('第一产业','第二产业','第三产业'))+guides(color = guide_legend(title = NULL)) + theme_bw()+ theme(legend.position = 'bottom')ggplot(gdpstr[gdpstr$var %in% c("con","inv","gov"),],aes(year, share, color=var))+geom_line(size=1)+ labs(title="支出法GDP结构",x="",y="")+scale_colour_discrete(labels = c('居民消费','政府支出','资本形成'))+guides(color = guide_legend(title = NULL)) + theme_bw()+theme(legend.position = 'bottom')# 收入结构gdpinc=fread("GDP_inc.csv",header=T)gdpinc_var=gdpinc%>% select(varid,reg,var)reginfo=data.frame(reg=unique(gdpinc_var$reg))str(reginfo)reginfo=reginfo%>%mutate(reg=as.character(reg))%>%mutate(regno=31:1)%>%arrange(regno)%>%mutate(reg6=rep(c("华北","东北","华东","华南","西南","西北"),times=c(5,3,7,6,5,5)))%>%mutate(reg6no=rep(c(10,20,30,40,50,60),times=c(5,3,7,6,5,5)))gdpinc=gdpinc%>%select(-varid)%>%gather(year,value,-reg,-var)%>%spread(var,value)gdpreg_inc=gdpinc%>% # 计算分省的收入结构mutate(dep=`固定资产折旧`/`收入法`)%>%mutate(lab=`劳动者报酬`/`收入法`)%>%mutate(tax=`生产税净额`/`收入法`)%>%mutate(pro=`营业盈余`/`收入法`)%>%select(reg,year,dep:pro)%>%gather(var,share,-reg,-year)a=gdpinc%>% # 计算全国的group_by(year)%>%mutate(dep=sum(`固定资产折旧`)/sum(`收入法`))%>%mutate(lab=sum(`劳动者报酬`)/sum(`收入法`))%>%mutate(tax=sum(`生产税净额`)/sum(`收入法`))%>%mutate(pro=sum(`营业盈余`)/sum(`收入法`))%>%select(reg,year,dep:pro)%>%filter(reg=="北京")%>%mutate(reg="全国")%>%gather(var,share,-reg,-year)a=data.frame(a) # 将数据格式设定为data.framegdpinc=rbind(gdpreg_inc,a) # 省与全国合并gdpinc=gdpinc%>%left_join(reginfo,by="reg")%>%mutate(regno=ifelse(reg=="全国",0,regno))%>%mutate(reg6no=ifelse(reg=="全国",0,reg6no))%>%mutate(reg6=ifelse(reg=="全国",reg,reg6))%>%arrange(var,year,regno)ggplot(gdpinc[gdpinc$regno==0,],aes(year,share,group=var,color=var))+geom_line(size=1)+labs(title="收入法GDP结构",x="",y="")+scale_colour_manual(values= c("dep"="grey","lab"="blue","pro"="red","tax"="purple"),labels = c('固定资产折旧','劳动者报酬','营业盈余','生产税净额'))+guides(color = guide_legend(title = NULL)) +theme_bw()+ theme(legend.position = 'bottom')
gdpreg=fread("GDP_reg.csv",header=T)gdpreg_var=gdpreg%>%select(varid,var,reg)
gdppareg=gdpreg%>%filter(var=="GDP指数")%>%select(-varid,-var)%>%gather(year,idx,-reg)%>%arrange(reg,year)%>%group_by(reg)%>%mutate(gdpidx=cumprod(idx/100))gdppa1978=data.frame(regno=1:31,reg=gdpreg$reg[gdpreg$var=="人均GDP"],gdppa=gdpreg$`1978`[gdpreg$var=="人均GDP"])a=merge(gdppareg,gdppa1978,by="reg",all.x=T)%>%arrange(regno,year)%>%filter(year==1978)%>%rename(gdpidx1978=gdpidx)%>%rename(gdppa1978=gdppa)%>%select(reg,regno,gdpidx1978,gdppa1978)gdppareg=merge(gdppareg,a,by="reg",all.x=T)gdppareg=gdppareg%>%mutate(gdppa=gdpidx/gdpidx1978*gdppa1978)%>%select(year,regno,reg,gdppa)%>%group_by(year)%>%mutate(gdppa_rel=gdppa/max(gdppa))%>%arrange(year,regno)table(gdppareg$reg[gdppareg$gdppa_rel==1]) # 检查每年人均GDP最高的地区,全是上海g_level=gdppareg%>%filter(year %in% c(1978,2015))%>%select(-gdppa)%>%spread(year,gdppa_rel)%>%mutate(rank78=n()+1-min_rank(`1978`))%>%mutate(rank15=n()+1-min_rank(`2015`))%>%mutate(top5=ifelse(rank78<=6 | rank15<=6, reg, ""))ggplot(g_level,aes(`1978`,`2015`,color=regno))+geom_point()+geom_abline(intercept = 0, slope = 1, color="red")+geom_text(aes(`1978`,`2015`, label=top5, hjust=0.5, vjust=-0.5))+labs(title = "各地区实际人均GDP相对于上海的变化",x="1978年相对于上海",y="2015年相对于上海")+theme_bw()g_gr=gdppareg%>%filter(year %in% c(1978,1996,1997,2015))%>%select(-gdppa_rel)%>%spread(year,gdppa)%>%mutate(gr7896=exp(log(`1996`/`1978`)/18)-1)%>%mutate(gr9715=exp(log(`2015`/`1997`)/18)-1)%>%mutate(rank1=n()+1-min_rank(gr7896))%>%mutate(rank2=n()+1-min_rank(gr9715))%>%mutate(top5=ifelse(rank1<=5 | rank2<=5, reg, ""))%>%select(regno,reg,gr7896,gr9715,top5)ggplot(g_gr,aes(gr7896,gr9715,color=regno))+geom_point()+geom_abline(intercept = 0, slope = 1, color="red")+geom_text_repel(aes(label = reg))+ylim(0.06,0.15) +xlim(0.06,0.15)+labs(title = "各地区实际人均GDP增长率的变化",x="1978-1996年",y="1997-2015年")+theme_bw()g_conv=gdppareg%>%filter(year %in% c(1978,2015))%>%select(-gdppa_rel)%>%spread(year,gdppa)%>%mutate(gr=exp(log(`2015`/`1978`)/37)-1)%>%mutate(rank=n()+1-min_rank(gr))%>%mutate(top5=ifelse(rank<=5, reg, ""))%>%select(regno,reg,`1978`,gr,top5)ggplot(g_conv,aes(log(`1978`),gr,color=regno))+geom_point()+geom_smooth(method="lm")+geom_text_repel(aes(label = reg))+labs(title = "地区收敛(含直辖市)",x="1978年对数实际GDP",y="1978-2015年年均增长率")+theme_bw()ggplot(g_conv[!g_conv$reg %in% c("北京","天津","上海","重庆"),],aes(log(`1978`),gr,color=regno))+geom_point()+geom_smooth(method="lm")+geom_text_repel(aes(label = reg))+labs(title = "地区收敛(不含直辖市)",x="1978年对数实际GDP",y="1978-2015年年均增长率")+theme_bw()reg=lm(gr~log(`1978`),data=g_conv) # OLS估计summary(reg)
# 产出gdpstrreg1=gdpreg%>%select(-varid)%>%filter(var %in% c("GDP","第一产业","第二产业","第三产业"))%>%gather(year,value,-reg,-var)%>%filter(year>=1978)%>%spread(var,value)%>%mutate(Sector1=`第一产业`/GDP)%>%mutate(Sector2=`第二产业`/GDP)%>%mutate(Sector3=`第三产业`/GDP)%>%select(year,reg,Sector1:Sector3)%>%gather(var,share,-reg,-year)%>%left_join(reginfo,by="reg")%>%mutate(year=as.numeric(year))%>%arrange(regno,year,var)ggplot(gdpstrreg1,aes(year,share,color=reg6))+geom_point()+facet_wrap(~var,ncol=3)+scale_x_continuous(breaks=seq(1980, 2015, 5))+guides(color = guide_legend(title = NULL)) +labs(title="各地区产业结构变动:1978-2015",x="",y="")+theme_bw()+ theme(legend.position = 'bottom')# 支出gdpstrreg2=gdpreg%>%select(-varid)%>%filter(var %in% c("支出法","最终消费支出","资本形成总额"))%>%gather(year,value,-reg,-var)%>%filter(year>=1993 & year<=2014)%>%spread(var,value)%>%mutate(`最终消费`=`最终消费支出`/`支出法`)%>%mutate(`资本形成`=`资本形成总额`/`支出法`)%>%select(year,reg,`最终消费`,`资本形成`)%>%gather(var,share,-reg,-year)%>%left_join(reginfo,by="reg")%>%mutate(year=as.numeric(year))%>%arrange(regno,year,var)ggplot(gdpstrreg2,aes(year,share,color=reg6))+geom_point()+facet_wrap(~var,ncol=2)+scale_x_continuous(breaks=seq(1995, 2015, 5))+guides(color = guide_legend(title = NULL))+labs(title="各地区支出结构变动:1993-2014",x="",y="")+theme_bw()+ theme(legend.position = 'bottom')# 收入gdpstrreg3=gdpinc%>%select(regno,year,var,share)%>%filter(regno>0)%>%spread(var,share)%>%rename(`固定资产折旧`=dep)%>%rename(`劳动者报酬`=lab)%>%rename(`营业盈余`=pro)%>%rename(`生产税净额`=tax)%>%gather(var,share,-regno,-year)%>%mutate(year=as.numeric(year))%>%left_join(reginfo,by="regno")ggplot(gdpstrreg3,aes(year,share,color=reg6))+geom_point()+facet_wrap(~var,ncol=2)+scale_x_continuous(breaks=seq(1995, 2015, 5))+guides(color = guide_legend(title = NULL))+labs(title="各地区收入结构变动:1993-2014",x="",y="")+theme_bw()+ theme(legend.position = 'bottom')