@fanxy
2018-03-06T18:07:53.000000Z
字数 8489
阅读 2659
樊潇彦
复旦大学经济学院
中级宏观
# 安装程序包
install.packages("dplyr") # 读取和整理数据
install.packages("tidyr")
install.packages("data.table")
install.packages("foreign")
install.packages("readstata13")
install.packages("haven")
install.packages("ggplot2") # 作图
install.packages("ggrepel")
install.packages("dygraphs")
install.packages("plotrix")
install.packages("lubridate") # 处理时序数据
install.packages("zoo")
install.packages("mFilter")
# 调用程序包
library(dplyr)
library(tidyr)
library(data.table)
library(foreign)
library(readstata13)
library(haven) # 有空字符时,用write_dta生成.dta
library(ggplot2)
library(ggrepel)
library(dygraphs) # 动态作图
library(plotrix) # 双轴作图
library(lubridate) # 可以用year()、month()等提取日期
library(zoo) # 处理时序数据
library(mFilter) # HP滤波
# 设定工作目录
setwd("D:\\...") # 设定工作目录
rm(list=ls()) # 清内存
# 数据读取和整理
gdp=fread("GDP.csv",header=T) # 读取数据
str(gdp) # 查看数据结构
gdp_var=gdp%>%select(varid,varname,var) # 指标一览
gdp=gdp%>%
select(-varid,-varname)%>% # 选择除varid和varname之外的所有指标
gather(year,value,-var)%>% # 将列指标1952-2015按年整合
mutate(year=as.numeric(year))%>% # 将字符型指标转换为数字型
filter(!is.na(value))%>% # 选取value不为空值的样本
arrange(var,year)%>% # 按var和year排序
spread(var,value) # 按var展开为列指标
head(gdp) # 查看前6个样本
# 计算对数实际人均GDP并分解
gdppa=gdp%>%
select(year,gdppa,gdppaid)%>%
mutate(gdppaid=ifelse(year==1952,100,gdppaid)) %>%
mutate(realgdppa_1952=gdp$gdppa[gdp$year==1952]*cumprod(gdppaid/100))%>%
mutate(realgdppa_2015=realgdppa_1952/realgdppa_1952[gdp$year==2015]*gdppa[gdp$year==2015])
gdppa_hp=hpfilter(log(gdppa$realgdppa_2015),drift=F, # 用hp滤波提取趋势和周期
freq=100,type="lambda") # 年度数据lambda=100,季度和月度分别为1600和14400
gdppa_hp=data.frame(year=gdppa$year, trend=gdppa_hp$trend,cycle=gdppa_hp$cycle)
twoord.plot(lx = gdppa_hp$year, ly = gdppa_hp$trend,
rx = gdppa_hp$year, ry = gdppa_hp$cycle,
main = '对数实际人均GDP:1952-2015', xlab = '',
ylab = '趋势', rylab = '周期',
type = c('line','line'))
# 生产结构和支出结构
gdpstr=gdp%>%
mutate(sec1=gdp1/gdp)%>%
mutate(sec2=gdp2/gdp)%>%
mutate(sec3=gdp3/gdp)%>%
mutate(con=gdpexpcpri/gdpexp)%>%
mutate(inv=gdpexpi/gdpexp)%>%
mutate(gov=gdpexpcgov/gdpexp)%>%
select(year,sec1:gov)%>%
gather(var,share,-year)
ggplot(gdpstr[gdpstr$var %in% c("sec1","sec2","sec3"),],aes(year,share,color=var))+
geom_line(size=1)+ labs(title="生产法GDP结构",x="",y="")+
scale_colour_discrete(labels = c('第一产业','第二产业','第三产业'))+
guides(color = guide_legend(title = NULL)) + theme_bw()+ theme(legend.position = 'bottom')
ggplot(gdpstr[gdpstr$var %in% c("con","inv","gov"),],aes(year, share, color=var))+
geom_line(size=1)+ labs(title="支出法GDP结构",x="",y="")+
scale_colour_discrete(labels = c('居民消费','政府支出','资本形成'))+
guides(color = guide_legend(title = NULL)) + theme_bw()+
theme(legend.position = 'bottom')
# 收入结构
gdpinc=fread("GDP_inc.csv",header=T)
gdpinc_var=gdpinc%>% select(varid,reg,var)
reginfo=data.frame(reg=unique(gdpinc_var$reg))
str(reginfo)
reginfo=reginfo%>%
mutate(reg=as.character(reg))%>%
mutate(regno=31:1)%>%arrange(regno)%>%
mutate(reg6=rep(c("华北","东北","华东","华南","西南","西北"),times=c(5,3,7,6,5,5)))%>%
mutate(reg6no=rep(c(10,20,30,40,50,60),times=c(5,3,7,6,5,5)))
gdpinc=gdpinc%>%
select(-varid)%>%
gather(year,value,-reg,-var)%>%
spread(var,value)
gdpreg_inc=gdpinc%>% # 计算分省的收入结构
mutate(dep=`固定资产折旧`/`收入法`)%>%
mutate(lab=`劳动者报酬`/`收入法`)%>%
mutate(tax=`生产税净额`/`收入法`)%>%
mutate(pro=`营业盈余`/`收入法`)%>%
select(reg,year,dep:pro)%>%
gather(var,share,-reg,-year)
a=gdpinc%>% # 计算全国的
group_by(year)%>%
mutate(dep=sum(`固定资产折旧`)/sum(`收入法`))%>%
mutate(lab=sum(`劳动者报酬`)/sum(`收入法`))%>%
mutate(tax=sum(`生产税净额`)/sum(`收入法`))%>%
mutate(pro=sum(`营业盈余`)/sum(`收入法`))%>%
select(reg,year,dep:pro)%>%
filter(reg=="北京")%>%
mutate(reg="全国")%>%
gather(var,share,-reg,-year)
a=data.frame(a) # 将数据格式设定为data.frame
gdpinc=rbind(gdpreg_inc,a) # 省与全国合并
gdpinc=gdpinc%>%
left_join(reginfo,by="reg")%>%
mutate(regno=ifelse(reg=="全国",0,regno))%>%
mutate(reg6no=ifelse(reg=="全国",0,reg6no))%>%
mutate(reg6=ifelse(reg=="全国",reg,reg6))%>%
arrange(var,year,regno)
ggplot(gdpinc[gdpinc$regno==0,],aes(year,share,group=var,color=var))+geom_line(size=1)+
labs(title="收入法GDP结构",x="",y="")+
scale_colour_manual(values= c("dep"="grey","lab"="blue","pro"="red","tax"="purple"),
labels = c('固定资产折旧','劳动者报酬','营业盈余','生产税净额'))+
guides(color = guide_legend(title = NULL)) +
theme_bw()+ theme(legend.position = 'bottom')
gdpreg=fread("GDP_reg.csv",header=T)
gdpreg_var=gdpreg%>%select(varid,var,reg)
gdppareg=gdpreg%>%
filter(var=="GDP指数")%>%
select(-varid,-var)%>%
gather(year,idx,-reg)%>%
arrange(reg,year)%>%
group_by(reg)%>%
mutate(gdpidx=cumprod(idx/100))
gdppa1978=data.frame(regno=1:31,reg=gdpreg$reg[gdpreg$var=="人均GDP"],
gdppa=gdpreg$`1978`[gdpreg$var=="人均GDP"])
a=merge(gdppareg,gdppa1978,by="reg",all.x=T)%>%
arrange(regno,year)%>%
filter(year==1978)%>%
rename(gdpidx1978=gdpidx)%>%
rename(gdppa1978=gdppa)%>%
select(reg,regno,gdpidx1978,gdppa1978)
gdppareg=merge(gdppareg,a,by="reg",all.x=T)
gdppareg=gdppareg%>%
mutate(gdppa=gdpidx/gdpidx1978*gdppa1978)%>%
select(year,regno,reg,gdppa)%>%
group_by(year)%>%
mutate(gdppa_rel=gdppa/max(gdppa))%>%
arrange(year,regno)
table(gdppareg$reg[gdppareg$gdppa_rel==1]) # 检查每年人均GDP最高的地区,全是上海
g_level=gdppareg%>%
filter(year %in% c(1978,2015))%>%
select(-gdppa)%>%
spread(year,gdppa_rel)%>%
mutate(rank78=n()+1-min_rank(`1978`))%>%
mutate(rank15=n()+1-min_rank(`2015`))%>%
mutate(top5=ifelse(rank78<=6 | rank15<=6, reg, ""))
ggplot(g_level,aes(`1978`,`2015`,color=regno))+geom_point()+
geom_abline(intercept = 0, slope = 1, color="red")+
geom_text(aes(`1978`,`2015`, label=top5, hjust=0.5, vjust=-0.5))+
labs(title = "各地区实际人均GDP相对于上海的变化",x="1978年相对于上海",y="2015年相对于上海")+
theme_bw()
g_gr=gdppareg%>%
filter(year %in% c(1978,1996,1997,2015))%>%
select(-gdppa_rel)%>%
spread(year,gdppa)%>%
mutate(gr7896=exp(log(`1996`/`1978`)/18)-1)%>%
mutate(gr9715=exp(log(`2015`/`1997`)/18)-1)%>%
mutate(rank1=n()+1-min_rank(gr7896))%>%
mutate(rank2=n()+1-min_rank(gr9715))%>%
mutate(top5=ifelse(rank1<=5 | rank2<=5, reg, ""))%>%
select(regno,reg,gr7896,gr9715,top5)
ggplot(g_gr,aes(gr7896,gr9715,color=regno))+geom_point()+
geom_abline(intercept = 0, slope = 1, color="red")+
geom_text_repel(aes(label = reg))+
ylim(0.06,0.15) +xlim(0.06,0.15)+
labs(title = "各地区实际人均GDP增长率的变化",x="1978-1996年",y="1997-2015年")+
theme_bw()
g_conv=gdppareg%>%
filter(year %in% c(1978,2015))%>%
select(-gdppa_rel)%>%
spread(year,gdppa)%>%
mutate(gr=exp(log(`2015`/`1978`)/37)-1)%>%
mutate(rank=n()+1-min_rank(gr))%>%
mutate(top5=ifelse(rank<=5, reg, ""))%>%
select(regno,reg,`1978`,gr,top5)
ggplot(g_conv,aes(log(`1978`),gr,color=regno))+geom_point()+geom_smooth(method="lm")+
geom_text_repel(aes(label = reg))+
labs(title = "地区收敛(含直辖市)",x="1978年对数实际GDP",y="1978-2015年年均增长率")+
theme_bw()
ggplot(g_conv[!g_conv$reg %in% c("北京","天津","上海","重庆"),],
aes(log(`1978`),gr,color=regno))+geom_point()+geom_smooth(method="lm")+
geom_text_repel(aes(label = reg))+
labs(title = "地区收敛(不含直辖市)",x="1978年对数实际GDP",y="1978-2015年年均增长率")+
theme_bw()
reg=lm(gr~log(`1978`),data=g_conv) # OLS估计
summary(reg)
# 产出
gdpstrreg1=gdpreg%>%
select(-varid)%>%
filter(var %in% c("GDP","第一产业","第二产业","第三产业"))%>%
gather(year,value,-reg,-var)%>%
filter(year>=1978)%>%
spread(var,value)%>%
mutate(Sector1=`第一产业`/GDP)%>%
mutate(Sector2=`第二产业`/GDP)%>%
mutate(Sector3=`第三产业`/GDP)%>%
select(year,reg,Sector1:Sector3)%>%
gather(var,share,-reg,-year)%>%
left_join(reginfo,by="reg")%>%
mutate(year=as.numeric(year))%>%
arrange(regno,year,var)
ggplot(gdpstrreg1,aes(year,share,color=reg6))+geom_point()+facet_wrap(~var,ncol=3)+
scale_x_continuous(breaks=seq(1980, 2015, 5))+
guides(color = guide_legend(title = NULL)) +
labs(title="各地区产业结构变动:1978-2015",x="",y="")+
theme_bw()+ theme(legend.position = 'bottom')
# 支出
gdpstrreg2=gdpreg%>%
select(-varid)%>%
filter(var %in% c("支出法","最终消费支出","资本形成总额"))%>%
gather(year,value,-reg,-var)%>%
filter(year>=1993 & year<=2014)%>%
spread(var,value)%>%
mutate(`最终消费`=`最终消费支出`/`支出法`)%>%
mutate(`资本形成`=`资本形成总额`/`支出法`)%>%
select(year,reg,`最终消费`,`资本形成`)%>%
gather(var,share,-reg,-year)%>%
left_join(reginfo,by="reg")%>%
mutate(year=as.numeric(year))%>%
arrange(regno,year,var)
ggplot(gdpstrreg2,aes(year,share,color=reg6))+geom_point()+facet_wrap(~var,ncol=2)+
scale_x_continuous(breaks=seq(1995, 2015, 5))+
guides(color = guide_legend(title = NULL))+
labs(title="各地区支出结构变动:1993-2014",x="",y="")+
theme_bw()+ theme(legend.position = 'bottom')
# 收入
gdpstrreg3=gdpinc%>%
select(regno,year,var,share)%>%
filter(regno>0)%>%
spread(var,share)%>%
rename(`固定资产折旧`=dep)%>%
rename(`劳动者报酬`=lab)%>%
rename(`营业盈余`=pro)%>%
rename(`生产税净额`=tax)%>%
gather(var,share,-regno,-year)%>%
mutate(year=as.numeric(year))%>%
left_join(reginfo,by="regno")
ggplot(gdpstrreg3,aes(year,share,color=reg6))+geom_point()+facet_wrap(~var,ncol=2)+
scale_x_continuous(breaks=seq(1995, 2015, 5))+
guides(color = guide_legend(title = NULL))+
labs(title="各地区收入结构变动:1993-2014",x="",y="")+
theme_bw()+ theme(legend.position = 'bottom')