@fanxy
2020-10-23T07:03:19.000000Z
字数 6359
阅读 9343
樊潇彦 复旦大学经济学院 数量软件
资料下载:
Kabacoff, R.I. 著:《R语言实战(第2版)》,王小宁等译,人民邮电出版社,2016
RiA2_Code.zip-491.4kB
软件安装:
工作界面
Run等命令键Ctrl+L清空下载数据并解压到工作目录:
20201009.rar
#---------- 1. 准备工作 ------------------------------------setwd("D:\\...") # 设置工作目录,数据也存在该目录下library(tidyverse)# library(dplyr)# library(tidyr)# library(ggplot2)library(readxl)#---------- 2. GDP与增长率 ---------------------------------data=read.csv("gdp.csv")str(data)gdp=data%>%rename(year=Sgnyea,GDP=Gdp0101)%>%arrange(year)%>%mutate(gr= GDP/lag(GDP)-1)%>%select(year,GDP,gr)%>%gather(var,value,-year)ggplot(gdp,aes(year,value))+geom_line(size=1)+ # 做线图,宽度为1facet_wrap(~var,scales="free")+ # 分面geom_vline(xintercept=c(1978,2001,2008), colour="black", linetype="dotted")+ # 加纵线labs(title="",x="",y="")+ # 图名与纵横坐标名称scale_x_continuous(breaks=seq(1952,2017,by=13))+theme_bw()+ # 黑白底theme(legend.position="bottom",strip.text= element_text(size=12), # 分面字号,纵横分面用element_text.x()和element_text.y()axis.text.x = element_text(size = 11), # 横轴字号axis.text.y = element_text(size = 11)) # 纵轴字号#---------- 3. 产业结构 ---------------------------------share=data.frame(var=paste("sh",1:3,sep=""), # 指标英文名称var_cn=factor(c("第一产业","第二产业","第三产业"), # 指标中文名称levels=c("第一产业","第二产业","第三产业"),ordered=T),stringsAsFactors = F)gdp_sh=data%>%rename(year=Sgnyea,gdp=Gdp0101)%>%mutate(sh1=Gdp0102/gdp,sh2=Gdp0103/gdp,sh3=Gdp0106/gdp)%>%select(year,sh1:sh3)%>%gather(var,share,-year)%>%left_join(share,by="var")%>%arrange(year,var_cn)ggplot(gdp_sh,aes(year,share,color=var_cn))+geom_line(size=1)+labs(title="历年GDP产业结构",x="",y="")+scale_colour_manual(values=c("green","red","blue"))+ # 设定线条颜色scale_x_continuous(breaks=seq(1952,2017,by=5))+scale_y_continuous(limits=c(0.05,0.55), breaks=seq(0.05,0.55,by=0.1),labels = scales::percent)+ # 纵轴为百分比guides(color=guide_legend(title=NULL))+ # 去掉颜色标签的titletheme_bw()+theme(legend.position="bottom", # 颜色标签置于底部legend.text=element_text(size=12), # 标签字号12axis.text.x = element_text(size = 11),axis.text.y = element_text(size = 11))#---------- 4. 城乡居民收入 ---------------------------------ruub = read_excel("CME_Consmp3.xls") # 读取数据colnames(ruub)=c("year","inc_ru","inc_ub","incid_ru","incid_ub","engel_ru","engel_ub") # 改变量名ruub= ruub%>%select(-incid_ru,-incid_ub)%>% # 去掉 incid_ru incid_ub 两个指标gather(var,value,-year, na.rm=T)%>% # 将数据变为 year var value 三列mutate(region=sub(".*_","",var),var=sub("_.*","",var)) # 生成 region = ru, ub 和 var = inc, engel# 在一张图中画出城乡居民人均收入的线图ggplot(ruub%>%filter(var=="inc"), aes(year,value,color=region))+geom_line()# 在一张图中画出城乡居民恩格尔曲线的线图ggplot(ruub%>%filter(var=="engel"), aes(year,value,color=region))+geom_line()# 按 var 分面作图,比较城乡居民人均收入和恩格尔曲线ggplot(ruub, aes(year,value, color=region))+geom_line()+facet_wrap(~var,scales="free")
下载数据:20201016.rar
#---------- 0. 准备工作 ---------------------------------------setwd("D:\\...")install.packages("ggrepel") # geom_text_repel命令,自动调整标记文本library(tidyverse)library(readxl)library(ggrepel)#---------- 1. 国家信息 ---------------------------------countryinfo=read_excel("countryinfo.xlsx")country6=c("China","Japan","United Kingdom","Germany","France","United States")#---------- 2. BIS实际汇率指数 ---------------------------------# BIS "Effective exchange rate indices (monthly)"# https://www.bis.org/statistics/full_data_sets.htmre_ori=read.csv("BISWEB_EERDATAFLOW_csv_col.csv")# colnames(re_ori)[689] # 截至日期2020.08str(re_ori[,1:8])table(re_ori[,3])re=re_ori%>%filter(EER_TYPE=="R", EER_BASKET=="B")%>%rename(iso2=REF_AREA,country=Reference.area)%>%select(iso2,country,X1964.01:X2020.08)%>%gather(time,re,-iso2,-country, na.rm =T)%>%mutate(year=as.numeric(substring(time,2,5)))%>%group_by(iso2,country,year)%>%summarise(re=mean(re))%>%group_by()%>%left_join(countryinfo,by="iso2")g_re=re%>%filter(country %in% country6)%>%mutate(label=ifelse(year==2020,country_cn,""))ggplot(g_re,aes(year,re,color=country_cn))+geom_line(size=1)+geom_text_repel(aes(label=label))+labs(title="",x="",y="")+guides(linetype=guide_legend(NULL))+scale_x_continuous(breaks = seq(1994,2020,2))+ # 纵轴连续theme_bw()+theme(legend.position="non",strip.text= element_text(size=11), # 分面字号,纵横分面用element_text.x()和element_text.y()axis.text.x = element_text(size = 11), # 横轴字号axis.text.y = element_text(size = 11)) # 纵轴字号summary(re)data_re=re%>%spread(year,re)%>% # 生成回归所用数据mutate(grre=`2017`/`1994`-1)%>% # PWT数据截至到2017年select(iso3,grre)#---------- 3. PWT实际人均GDP ---------------------------------# Penn World Table version 9.1# https://www.rug.nl/ggdc/productivity/pwt/pwt=read_excel("pwt91.xlsx",sheet="Data")str(pwt)pwt=pwt%>%mutate(rgdppa=rgdpe/pop)%>%filter(!is.na(rgdppa))%>%rename(iso3=countrycode)%>%select(year,iso3,country,rgdppa)%>%left_join(countryinfo,by="iso3")g_pwt=pwt%>%filter(country %in% country6)%>%mutate(label=ifelse(year==1992,country_cn,""))ggplot(g_pwt,aes(year,rgdppa,color=country_cn))+geom_line(size=1)+geom_text_repel(aes(label=label))+labs(title="",x="",y="")+guides(linetype=guide_legend(NULL))+scale_x_continuous(breaks = seq(1952,2017,5))+ # 纵轴连续,中国数据从1952年开始theme_bw()+theme(legend.position="non",strip.text= element_text(size=11),# 分面字号,纵横分面用element_text.x()和element_text.y()axis.text.x = element_text(size = 11), # 横轴字号axis.text.y = element_text(size = 11)) # 纵轴字号data_pwt=pwt%>%filter(year>=1994)%>%spread(year,rgdppa)%>%mutate(grgdppa=`2017`/`1994`-1)%>% # 汇率数据中国从1994年开始select(iso3,country_UN,country_cn,grgdppa)#---------- 4. 合并数据,检验BS效应 ---------------------------------data=merge(data_re,data_pwt,by="iso3",all=T)%>%filter(!is.na(grre) & !is.na(grgdppa))%>%arrange(country_cn)%>%select(iso3,country_cn,country_UN,grre,grgdppa)ggplot(data,aes(grgdppa,grre))+geom_point()+geom_smooth(method="lm")+geom_text_repel(aes(label=country_cn))+labs(title="检验巴拉萨-萨缪尔森效应(1994-2017)",x="实际人均GDP增长",y="实际汇率指数增长")+theme_bw()+theme(strip.text= element_text(size=11),# 分面字号,纵横分面用element_text.x()和element_text.y()axis.text.x = element_text(size = 11), # 横轴字号axis.text.y = element_text(size = 11)) # 纵轴字号regbs=lm(grre~grgdppa,data)summary(regbs)