@fanxy
2020-10-23T15:03:19.000000Z
字数 6359
阅读 8922
樊潇彦
复旦大学经济学院
数量软件
资料下载:
Kabacoff, R.I. 著:《R语言实战(第2版)》,王小宁等译,人民邮电出版社,2016
RiA2_Code.zip-491.4kB
软件安装:
工作界面
Run
等命令键Ctrl+L
清空下载数据并解压到工作目录:
20201009.rar
#---------- 1. 准备工作 ------------------------------------
setwd("D:\\...") # 设置工作目录,数据也存在该目录下
library(tidyverse)
# library(dplyr)
# library(tidyr)
# library(ggplot2)
library(readxl)
#---------- 2. GDP与增长率 ---------------------------------
data=read.csv("gdp.csv")
str(data)
gdp=data%>%
rename(year=Sgnyea,GDP=Gdp0101)%>%arrange(year)%>%
mutate(gr= GDP/lag(GDP)-1)%>%
select(year,GDP,gr)%>%
gather(var,value,-year)
ggplot(gdp,aes(year,value))+
geom_line(size=1)+ # 做线图,宽度为1
facet_wrap(~var,scales="free")+ # 分面
geom_vline(xintercept=c(1978,2001,2008), colour="black", linetype="dotted")+ # 加纵线
labs(title="",x="",y="")+ # 图名与纵横坐标名称
scale_x_continuous(breaks=seq(1952,2017,by=13))+
theme_bw()+ # 黑白底
theme(legend.position="bottom",
strip.text= element_text(size=12), # 分面字号,纵横分面用element_text.x()和element_text.y()
axis.text.x = element_text(size = 11), # 横轴字号
axis.text.y = element_text(size = 11)) # 纵轴字号
#---------- 3. 产业结构 ---------------------------------
share=data.frame(var=paste("sh",1:3,sep=""), # 指标英文名称
var_cn=factor(c("第一产业","第二产业","第三产业"), # 指标中文名称
levels=c("第一产业","第二产业","第三产业"),
ordered=T),
stringsAsFactors = F)
gdp_sh=data%>%
rename(year=Sgnyea,gdp=Gdp0101)%>%
mutate(sh1=Gdp0102/gdp,sh2=Gdp0103/gdp,sh3=Gdp0106/gdp)%>%
select(year,sh1:sh3)%>%
gather(var,share,-year)%>%
left_join(share,by="var")%>%
arrange(year,var_cn)
ggplot(gdp_sh,aes(year,share,color=var_cn))+
geom_line(size=1)+
labs(title="历年GDP产业结构",x="",y="")+
scale_colour_manual(values=c("green","red","blue"))+ # 设定线条颜色
scale_x_continuous(breaks=seq(1952,2017,by=5))+
scale_y_continuous(limits=c(0.05,0.55), breaks=seq(0.05,0.55,by=0.1),
labels = scales::percent)+ # 纵轴为百分比
guides(color=guide_legend(title=NULL))+ # 去掉颜色标签的title
theme_bw()+
theme(legend.position="bottom", # 颜色标签置于底部
legend.text=element_text(size=12), # 标签字号12
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11))
#---------- 4. 城乡居民收入 ---------------------------------
ruub = read_excel("CME_Consmp3.xls") # 读取数据
colnames(ruub)=c("year","inc_ru","inc_ub","incid_ru","incid_ub","engel_ru","engel_ub") # 改变量名
ruub= ruub%>%
select(-incid_ru,-incid_ub)%>% # 去掉 incid_ru incid_ub 两个指标
gather(var,value,-year, na.rm=T)%>% # 将数据变为 year var value 三列
mutate(region=sub(".*_","",var),
var=sub("_.*","",var)) # 生成 region = ru, ub 和 var = inc, engel
# 在一张图中画出城乡居民人均收入的线图
ggplot(ruub%>%filter(var=="inc"), aes(year,value,color=region))+geom_line()
# 在一张图中画出城乡居民恩格尔曲线的线图
ggplot(ruub%>%filter(var=="engel"), aes(year,value,color=region))+geom_line()
# 按 var 分面作图,比较城乡居民人均收入和恩格尔曲线
ggplot(ruub, aes(year,value, color=region))+geom_line()+
facet_wrap(~var,scales="free")
下载数据:20201016.rar
#---------- 0. 准备工作 ---------------------------------------
setwd("D:\\...")
install.packages("ggrepel") # geom_text_repel命令,自动调整标记文本
library(tidyverse)
library(readxl)
library(ggrepel)
#---------- 1. 国家信息 ---------------------------------
countryinfo=read_excel("countryinfo.xlsx")
country6=c("China","Japan","United Kingdom","Germany","France","United States")
#---------- 2. BIS实际汇率指数 ---------------------------------
# BIS "Effective exchange rate indices (monthly)"
# https://www.bis.org/statistics/full_data_sets.htm
re_ori=read.csv("BISWEB_EERDATAFLOW_csv_col.csv")
# colnames(re_ori)[689] # 截至日期2020.08
str(re_ori[,1:8])
table(re_ori[,3])
re=re_ori%>%
filter(EER_TYPE=="R", EER_BASKET=="B")%>%
rename(iso2=REF_AREA,country=Reference.area)%>%
select(iso2,country,X1964.01:X2020.08)%>%
gather(time,re,-iso2,-country, na.rm =T)%>%
mutate(year=as.numeric(substring(time,2,5)))%>%
group_by(iso2,country,year)%>%
summarise(re=mean(re))%>%
group_by()%>%
left_join(countryinfo,by="iso2")
g_re=re%>%
filter(country %in% country6)%>%
mutate(label=ifelse(year==2020,country_cn,""))
ggplot(g_re,aes(year,re,color=country_cn))+geom_line(size=1)+
geom_text_repel(aes(label=label))+
labs(title="",x="",y="")+guides(linetype=guide_legend(NULL))+
scale_x_continuous(breaks = seq(1994,2020,2))+ # 纵轴连续
theme_bw()+
theme(legend.position="non",
strip.text= element_text(size=11), # 分面字号,纵横分面用element_text.x()和element_text.y()
axis.text.x = element_text(size = 11), # 横轴字号
axis.text.y = element_text(size = 11)) # 纵轴字号
summary(re)
data_re=re%>%spread(year,re)%>% # 生成回归所用数据
mutate(grre=`2017`/`1994`-1)%>% # PWT数据截至到2017年
select(iso3,grre)
#---------- 3. PWT实际人均GDP ---------------------------------
# Penn World Table version 9.1
# https://www.rug.nl/ggdc/productivity/pwt/
pwt=read_excel("pwt91.xlsx",sheet="Data")
str(pwt)
pwt=pwt%>%
mutate(rgdppa=rgdpe/pop)%>%filter(!is.na(rgdppa))%>%
rename(iso3=countrycode)%>%
select(year,iso3,country,rgdppa)%>%
left_join(countryinfo,by="iso3")
g_pwt=pwt%>%
filter(country %in% country6)%>%
mutate(label=ifelse(year==1992,country_cn,""))
ggplot(g_pwt,aes(year,rgdppa,color=country_cn))+geom_line(size=1)+
geom_text_repel(aes(label=label))+
labs(title="",x="",y="")+guides(linetype=guide_legend(NULL))+
scale_x_continuous(breaks = seq(1952,2017,5))+ # 纵轴连续,中国数据从1952年开始
theme_bw()+
theme(legend.position="non",
strip.text= element_text(size=11),# 分面字号,纵横分面用element_text.x()和element_text.y()
axis.text.x = element_text(size = 11), # 横轴字号
axis.text.y = element_text(size = 11)) # 纵轴字号
data_pwt=pwt%>%filter(year>=1994)%>%
spread(year,rgdppa)%>%
mutate(grgdppa=`2017`/`1994`-1)%>% # 汇率数据中国从1994年开始
select(iso3,country_UN,country_cn,grgdppa)
#---------- 4. 合并数据,检验BS效应 ---------------------------------
data=merge(data_re,data_pwt,by="iso3",all=T)%>%
filter(!is.na(grre) & !is.na(grgdppa))%>%
arrange(country_cn)%>%
select(iso3,country_cn,country_UN,grre,grgdppa)
ggplot(data,aes(grgdppa,grre))+geom_point()+geom_smooth(method="lm")+
geom_text_repel(aes(label=country_cn))+
labs(title="检验巴拉萨-萨缪尔森效应(1994-2017)",x="实际人均GDP增长",y="实际汇率指数增长")+
theme_bw()+
theme(strip.text= element_text(size=11),# 分面字号,纵横分面用element_text.x()和element_text.y()
axis.text.x = element_text(size = 11), # 横轴字号
axis.text.y = element_text(size = 11)) # 纵轴字号
regbs=lm(grre~grgdppa,data)
summary(regbs)