@fanxy
2016-09-19T20:20:14.000000Z
字数 8321
阅读 1821
樊潇彦
复旦大学经济学院
中级宏观
# 准备工作
install.packages("readxl") # 读取excel数据
install.packages("stringr") # 处理字符串
install.packages("corrplot") # 作相关关系图
install.packages("igraph") # 作网络图
install.packages("forecast") # 作季节调整
library(readxl) # 读取excel数据
library(stringr) # 字符串处理
library(corrplot)
library(igraph)
library(forecast)
library(stats) # 基础包,不用安装直接调用
library(dplyr)
library(tidyr)
library(data.table)
library(foreign)
library(readstata13)
library(haven)
library(ggplot2)
library(ggrepel)
library(dygraphs)
library(plotrix)
library(lubridate)
library(zoo)
library(mFilter)
setwd("D:\\...") # 设定工作目录
rm(list=ls())
total=read_excel("Ch03_Data.xls",sheet="total", skip=1,
col_names=T,col_types=c("date",rep("numeric",4)))
gdp=read_excel("../Ch02/Ch02_Data.xlsx",col_names=T,sheet="gdp_idx")
gdp=gdp%>%filter(!is.na(year))%>%select(year,GDP)
tax_str=total%>%
mutate(year=year(`指标名称`))%>%
left_join(gdp,by="year")%>%
mutate(`国家财政收入/GDP`=`国家财政收入`/GDP)%>%
mutate(`国家财政赤字/GDP`=(`国家财政收入`-`国家财政支出`)/GDP)%>%
select(year,`国家财政收入/GDP`,`国家财政赤字/GDP`,`中央财政收入:占比`,`中央财政支出:占比`)%>%
arrange(year)%>%
gather(var,share,-year)%>%
filter(!is.na(share))
ggplot(tax_str[tax_str$var %in% c("国家财政收入/GDP","国家财政赤字/GDP"),],
aes(year,share,color=var))+geom_line(size=1)+
geom_vline(xintercept=1958,linetype = "dotdash",col="black")+
geom_vline(xintercept=1978,linetype = "dotdash",col="black")+
geom_vline(xintercept=1984,linetype = "dotdash",col="black")+
geom_vline(xintercept=1994,linetype = "dotdash",col="red")+
labs(title="国家财政收入与赤字占GDP的比重",x="",y="")+
scale_x_continuous(breaks = c(1952, 1958, 1978, 1984, 1994, 2015))+
guides(color = guide_legend(title = NULL)) +
theme_bw()+ theme(legend.position = 'bottom')
ggplot(tax_str[tax_str$var %in% c("中央财政收入:占比","中央财政支出:占比"),],
aes(year,share,color=var))+geom_line(size=1)+
geom_vline(xintercept=1958,linetype = "dotdash",col="black")+
geom_vline(xintercept=1978,linetype = "dotdash",col="black")+
geom_vline(xintercept=1984,linetype = "dotdash",col="black")+
geom_vline(xintercept=1994,linetype = "dotdash",col="red")+
labs(title="中央财政收入和支出占比(%)",x="",y="")+
scale_x_continuous(breaks = c(1952, 1958, 1978, 1984, 1994, 2015))+
guides(color = guide_legend(title = NULL)) +
theme_bw()+ theme(legend.position = 'bottom')
class=read_excel("Ch03_Data.xls",sheet="class",
col_names=T,col_types=c(rep("text",2),rep("numeric",66)))
colnames(class)=c("varid","var",1950:2015)
class_var=class[,c(1,2)]
class=class%>%
select(-varid)%>%
gather(year,value,-var)%>%
filter(!is.na(value))%>%
mutate(year=as.numeric(year))%>%
mutate(level=substr(var,1,2))%>%
mutate(item=sub(".*:","",var))%>%
mutate(var=sub(".*公共财政支出.*","公共财政支出",var))%>%
mutate(var=sub(".*公共财政收入:税收收入.*","公共财政收入:税收",var))%>%
mutate(var=sub(".*公共财政收入:非税收收入.*","公共财政收入:非税收",var))%>%
mutate(item=sub("事务","",item))%>%
mutate(item=sub("医疗卫生与计划生育","医卫计生",item))%>%
mutate(item=sub("社会保障和就业","社保就业",item))%>%
arrange(level, var, item, year)
inc_str=class%>%
filter((var=="公共财政收入:非税收" | var=="公共财政收入:税收") &
item!="合计" & year>=2007)%>%
group_by(year,level, var)%>%
summarise(total_inc=sum(value))%>%
spread(var,total_inc)%>%
mutate(tax_share=`公共财政收入:税收`/(`公共财政收入:税收`+`公共财政收入:非税收`))
ggplot(inc_str,aes(year,tax_share,color=level))+geom_line(size=1)+
labs(title="各级财政税收收入占比",x="",y="")+
guides(color=guide_legend(title=NULL))+
theme_bw()+theme(legend.position="bottom")
tax_str=class%>%
filter(var=="公共财政收入:税收" & item!="合计" & year %in% c(2010,2015))%>%
group_by(year,level)%>%
mutate(total_inc=sum(value))%>%
mutate(share=value/total_inc)%>%
select(year,level,item, share)%>%
mutate(rk=ifelse(year==2015,n()+1-min_rank(share),0))%>%
group_by(level,item)%>%
mutate(rk=max(rk))%>%
filter(rk<=7 & rk>0)%>%
group_by()%>%
arrange(year,level,rk)
labs=tax_str$item[tax_str$year==2015 & tax_str$level=="全国"]
names(labs)=tax_str$rk[tax_str$year==2015 & tax_str$level=="全国"]
ggplot(tax_str[tax_str$level=="全国",],aes(rk,share,fill=as.factor(year)))+
geom_bar(stat="identity", position = "dodge")+
labs(title="全国财政税收收入结构",x="",y="")+
guides(fill=guide_legend(title=NULL))+
scale_x_continuous(breaks=1:length(labs),labels=labs)+
theme_bw()+theme(legend.position="bottom")
labs=tax_str$item[tax_str$year==2015 & tax_str$level=="地方"]
names(labs)=tax_str$rk[tax_str$year==2015 & tax_str$level=="地方"]
ggplot(tax_str[tax_str$level=="地方",],aes(rk,share,fill=as.factor(year)))+
geom_bar(stat="identity", position = "dodge")+
labs(title="地方财政税收收入结构",x="",y="")+
guides(fill=guide_legend(title=NULL))+
scale_x_continuous(breaks=1:length(labs),labels=labs)+
theme_bw()+theme(legend.position="bottom")
labs=tax_str$item[tax_str$year==2015 & tax_str$level=="中央"]
names(labs)=tax_str$rk[tax_str$year==2015 & tax_str$level=="中央"]
ggplot(tax_str[tax_str$level=="中央",],aes(rk,share,fill=as.factor(year)))+
geom_bar(stat="identity", position = "dodge")+
labs(title="中央财政税收收入结构",x="",y="")+
guides(fill=guide_legend(title=NULL))+
scale_x_continuous(breaks=1:length(labs),labels=labs)+
theme_bw()+theme(legend.position="bottom")
ind=read_excel("Ch03_Data.xls",sheet="ind",
col_names=T,col_types=c(rep("text",2),rep("numeric",5)))
colnames(ind)=c("varid","var",2010:2014)
ind_var=ind[,c(1,2)]
ind=ind%>%
mutate(level=sub("税收收入:","",var))%>%
mutate(industry=sub(".*:","",level))%>%
mutate(level=sub(":.*","",level))%>%
mutate(level=ifelse(level==industry,"全国合计",level))%>%
select(-varid,-var)
ind_total=ind%>%
filter(level=="全国合计" &
!industry %in% c("第一产业","第二产业","第三产业"))%>%
select(-level)%>%
gather(year,inc,-industry)%>%
mutate(year=as.numeric(year))%>%
filter(year %in% c(2010,2014))%>%
group_by(year)%>%
mutate(total=sum(inc))%>%
mutate(share=inc/total)%>%
mutate(rk=ifelse(year==2014,n()+1-min_rank(share),0))%>%
group_by(industry)%>%
mutate(rk=max(rk))%>%
filter(rk<=7 & rk>0)%>%
arrange(year,rk)
labs=ind_total$industry[ind_total$year==2014]
names(labs)=ind_total$rk[ind_total$year==2014]
ggplot(ind_total,aes(rk,share,fill=as.factor(year)))+
geom_bar(stat="identity", position = "dodge")+
labs(title="税收收入行业结构",x="",y="")+
guides(fill=guide_legend(title=NULL))+
scale_x_continuous(breaks=1:length(labs),labels=labs)+
theme_bw()+theme(legend.position="bottom")
ind_guo=ind%>%
filter(level=="国家税务局"&
!industry %in% c("第一产业","第二产业","第三产业"))%>%
gather(year,guo,-level,-industry)%>%
mutate(id=paste(industry,year,sep=""))%>%
select(-level)
ind_di=ind%>%
filter(level=="地方税务局"&
!industry %in% c("第一产业","第二产业","第三产业"))%>%
gather(year,di,-level,-industry)%>%
mutate(id=paste(industry,year,sep=""))%>%
select(-level,-industry,-year)
relative=merge(ind_guo,ind_di,by="id")
relative=relative%>%
filter(year %in% c("2010","2014"))%>%
mutate(diff=guo-di)%>%
select(-id,-guo,-di)%>%
spread(year,diff)
ggplot(relative,aes(`2010`,`2014`))+geom_point(size=2,color="red")+
geom_text_repel(aes(label=industry))+
labs(title="国家与地方税务局税收收入差额",x="2010年",y="2014年")+theme_bw()
exp_str=class%>%
filter(var=="公共财政支出" & year %in% c(2010,2015))%>%
group_by(year,level)%>%
mutate(total_exp=sum(value))%>%
mutate(share=value/total_exp)%>%
select(year,level,item,share)%>%
mutate(rk=ifelse(year==2015,n()+1-min_rank(share),0))%>%
group_by(level,item)%>%
mutate(rk=max(rk))%>%
filter(rk<=7 & rk>0)%>%
arrange(year,level,rk)
labs=exp_str$item[exp_str$year==2015 & exp_str$level=="全国"]
names(labs)=exp_str$rk[exp_str$year==2015 & exp_str$level=="全国"]
ggplot(exp_str[exp_str$level=="全国",],aes(rk,share,fill=as.factor(year)))+
geom_bar(stat="identity", position = "dodge")+
labs(title="全国财政支出结构",x="",y="")+
guides(fill=guide_legend(title=NULL))+
scale_x_continuous(breaks=1:length(labs),labels=labs)+
theme_bw()+theme(legend.position="bottom")
labs=exp_str$item[exp_str$year==2015 & exp_str$level=="地方"]
names(labs)=exp_str$rk[exp_str$year==2015 & exp_str$level=="地方"]
ggplot(exp_str[exp_str$level=="地方",],aes(rk,share,fill=as.factor(year)))+
geom_bar(stat="identity", position = "dodge")+
labs(title="地方财政支出结构",x="",y="")+
scale_x_continuous(breaks=1:length(labs),labels=labs)+
guides(fill=guide_legend(title=NULL))+
theme_bw()+theme(legend.position="bottom")
labs=exp_str$item[exp_str$year==2015 & exp_str$level=="中央"]
names(labs)=exp_str$rk[exp_str$year==2015 & exp_str$level=="中央"]
ggplot(exp_str[exp_str$level=="中央",],aes(rk,share,fill=as.factor(year)))+
geom_bar(stat="identity", position = "dodge")+
labs(title="中央财政支出结构",x="",y="")+
scale_x_continuous(breaks=1:length(labs),labels=labs)+
guides(fill=guide_legend(title=NULL))+
theme_bw()+theme(legend.position="bottom")