@fanxy
2020-03-25T20:19:10.000000Z
字数 8813
阅读 1180
樊潇彦
复旦大学经济学院
金融数据
下载数据:Ex1.rar
setwd("D:\\...\\课堂练习1")
rm(list=ls())
library(tidyverse)
library(readxl) # 读取excel数据
library(stringr) # 字符串处理
library(ggplot2)
library(zoo)
CAMP模型设定为:
在回归检验之前,我们要将 camp.xlsx
中的数据合并为一张数据表,其中:
rf=read_excel('camp.xlsx',sheet="无风险利率")
str(rf)
rf=rf%>%
mutate(Trdmnt=as.yearmon(Clsdt))%>%
group_by(Trdmnt)%>%summarise(rf=mean(Nrrmtdt))%>%group_by() # 月度均值
rm=read_excel("camp.xlsx",sheet="市场收益率")
str(rm)
rm_rf=rm%>%
mutate(Trdmnt=as.yearmon(Trdmnt))%>%
left_join(rf,by="Trdmnt")%>% # 合并数据
mutate(id=paste(Trdmnt,Markettype,sep="-")) # 标识每个市场每个月的回报率
r=read_excel("camp.xlsx",sheet="个股收益率")
str(r)
capm=r%>%
mutate(Trdmnt=as.yearmon(Trdmnt),
id=paste(Trdmnt,Markettype,sep="-"))%>%
select(-Trdmnt,-Markettype)%>%
left_join(rm_rf,by="id")%>% # 个股所在市场该月的回报率
mutate(r_ex=Mretwd-rf, # 计算个股超额回报
rm_exeq=Mretwdeq-rf, # 分别计算三种市场超额回报
rm_exos=Mretwdos-rf,
rm_extl=Mretwdtl-rf)%>%
select(Trdmnt,Stkcd,Markettype,contains("_ex"))
str(capm)
ggplot(capm%>%mutate(Trdmnt=as.Date(Trdmnt)),
aes(Trdmnt,r_ex,color="r_ex"))+
geom_line()+
geom_line(aes(Trdmnt,rm_exos,color="rm_exos"))+
facet_wrap(~Stkcd)+
theme_bw()
打开联合国数据 gdp.xlsx
,包括以下指标:
我们要分析中国与日、法、德、英、美相比,在过去48年中的GDP表现,需要作图的指标包括:
1. GDP总量(单位:万亿美元);
2. 净出口额(单位:亿美元);
3. 广义第二产业(采矿、制造和公用事业)占GDP的比重;
4. GDP占全球的份额。
gdp=read_excel("gdp.xlsx",skip=2)
country=data.frame(CountryID=c(156,392,250,276,826,840),
cou_cn=c("1 中国","3 日本","5 法国","4 德国","6 英国","2 美国"),
stringsAsFactors = F)
gdp=gdp%>%
right_join(country,by="CountryID")%>%
filter(IndicatorName %in% c("Gross Domestic Product (GDP)",
"Exports of goods and services",
"Imports of goods and services",
"Mining, Manufacturing, Utilities (ISIC C-E)"))%>%
select(cou_cn,IndicatorName,'1970':'2018')%>%
gather(year,value,-IndicatorName,-cou_cn)%>%
spread(IndicatorName,value)%>%
rename(GDP='Gross Domestic Product (GDP)',
EX='Exports of goods and services',
IM='Imports of goods and services',
MAN='Mining, Manufacturing, Utilities (ISIC C-E)')%>%
mutate(year=as.numeric(year),
'GDP(单位:万亿美元)'=GDP/10^12,
'净出口额(单位:亿美元)'=(EX-IM)/10^8,
'采矿、制造和公用事业占GDP的比重'=MAN/GDP)%>%
group_by(year)%>%mutate('GDP的全球份额'=GDP/sum(GDP))%>%group_by()%>%
select(cou_cn,year,'GDP(单位:万亿美元)','净出口额(单位:亿美元)','采矿、制造和公用事业占GDP的比重','GDP的全球份额')%>%
gather(var,value,-year,-cou_cn)
# 作图
ggplot(gdp,aes(year,value,color=cou_cn))+
geom_line(size=1)+
facet_wrap(~var,scales="free")+
labs(title="",x="",y="")+
guides(linetype=guide_legend(NULL))+
geom_vline(xintercept=c(1978,1992,2001,2008),linetype=2, color="darkgrey")+
scale_x_continuous(breaks = seq(1970,2018,6))+
guides(color=guide_legend(title=NULL))+theme_bw()+
theme(legend.position="bottom",
legend.text=element_text(size=11),
axis.text=element_text(size=9))
i=read.csv("WEBSTATS_CBPOL_M_DATAFLOW_csv_col.csv",stringsAsFactors = F)
coulist=data.frame(Reference.area=c("China","Japan","Euro area","United Kingdom","United States"),
cou_cn=c("1 中国","2 日本","3 德国","4 英国", "5 美国"))
library(lubridate)
i5=i%>%select(-FREQ, -Frequency, -Time.Period)%>%
gather(time,i,-REF_AREA,-Reference.area, na.rm=T)%>%
mutate(time=gsub("X","",time),
time=gsub("[.]","-",time),
time=as.yearmon(time))%>%
filter(Reference.area %in% coulist$Reference.area)%>%
left_join(coulist,by='Reference.area')%>%
arrange(cou_cn,time)%>%
select(time, i, cou_cn)%>%
mutate(time=as.Date(time),year=year(time))%>%
group_by(year,cou_cn)%>%summarise(i=mean(i))%>%group_by()
data=gdp%>%
filter(var=="GDP(单位:万亿美元)")%>%select(-var)%>%
rename(gdp=value)%>%left_join(i5,by=c("cou_cn","year"))%>%
gather(var,value,-cou_cn,-year,na.rm=T)
ggplot(data,aes(year,value,color=cou_cn))+
geom_line(size=1)+
facet_wrap(~var,scales="free")+
labs(title="",x="",y="")+
guides(linetype=guide_legend(NULL))+
geom_vline(xintercept=c(1978,1992,2001,2008),linetype=2, color="darkgrey")+
scale_x_continuous(breaks = seq(1970,2018,6))+
guides(color=guide_legend(title=NULL))+theme_bw()+
theme(legend.position="bottom",
legend.text=element_text(size=11),
axis.text=element_text(size=9))
gdp=read_excel("gdp.xlsx",skip=2)
country=data.frame(CountryID=c(156,392,276,826,840),
cou_cn=c("1 中国","2 日本","3 德国","4 英国","5 美国"),
stringsAsFactors = F)
gdp=gdp%>%
right_join(country,by="CountryID")%>%
filter(IndicatorName %in% c("Gross Domestic Product (GDP)",
"Exports of goods and services",
"Imports of goods and services",
"Mining, Manufacturing, Utilities (ISIC C-E)"))%>%
select(cou_cn,IndicatorName,'1970':'2018')%>%
gather(year,value,-IndicatorName,-cou_cn)%>%
spread(IndicatorName,value)%>%
rename(GDP='Gross Domestic Product (GDP)',
EX='Exports of goods and services',
IM='Imports of goods and services',
MAN='Mining, Manufacturing, Utilities (ISIC C-E)')%>%
mutate(year=as.numeric(year),
'GDP(单位:万亿美元)'=GDP/10^12,
'净出口额(单位:亿美元)'=(EX-IM)/10^8,
'采矿、制造和公用事业占GDP的比重'=MAN/GDP)%>%
group_by(year)%>%mutate('GDP的全球份额'=GDP/sum(GDP))%>%group_by()%>%
select(cou_cn,year,'GDP(单位:万亿美元)')%>%
gather(var,value,-year,-cou_cn)%>%
select(year, cou_cn, value)%>%
rename(GDP='value')
i=read.csv("WEBSTATS_CBPOL_M_DATAFLOW_csv_col.csv")
coulist=data.frame(Reference.area=c("China","Japan","Euro area","United Kingdom","United States"),
coun_cn=c("1 中国","2 日本","3 德国","4 英国", "5 美国"))
i5=i%>%select(-FREQ, -Frequency, -Time.Period)%>%
gather(time,i,-REF_AREA,-Reference.area, na.rm=T)%>%
mutate(time=gsub("X","",time),
time=gsub("[.]","-",time),
time=as.yearmon(time))%>%
filter(Reference.area %in% coulist$Reference.area)%>%
left_join(coulist,by="Reference.area")%>%
arrange(coun_cn,time)%>%
mutate(year=floor(as.numeric(time)))%>%
group_by(year, coun_cn)%>%summarise('i'=mean(i))%>%group_by()%>%
select(year, coun_cn, i) %>%
arrange(coun_cn,year) %>%
rename(cou_cn='coun_cn')
total = gdp %>% left_join(i5, by=c('cou_cn', 'year')) %>%
select('year', 'cou_cn', 'i', 'GDP')%>%
gather(var,value,-year,-cou_cn)
ggplot(total,
aes(year,value,color=cou_cn))+
geom_line()+
facet_wrap(~var)+
theme_bw()
gdp=read_excel("gdp.xlsx",skip=2)
country=data.frame(CountryID=c(156,392,276,826,840),
coun_cn=c("1 中国","2 日本","3 欧元区","4 英国","5 美国"),
stringsAsFactors = F)
gdp=gdp%>%
right_join(country,by="CountryID")%>%
filter(IndicatorName %in% c("Gross Domestic Product (GDP)",
"Exports of goods and services",
"Imports of goods and services",
"Mining, Manufacturing, Utilities (ISIC C-E)"))%>%
select(coun_cn,IndicatorName,'1970':'2018')%>%
gather(year,value,-IndicatorName,-coun_cn)%>%
spread(IndicatorName,value)%>%
rename(GDP='Gross Domestic Product (GDP)',
EX='Exports of goods and services',
IM='Imports of goods and services',
MAN='Mining, Manufacturing, Utilities (ISIC C-E)') %>%
select(-EX,-IM,-MAN)
coulist=data.frame(Reference.area=c("China","Japan","Euro area","United Kingdom","United States"),
coun_cn=c("1 中国","2 日本","3 欧元区","4 英国","5 美国"))
i <- read.csv("WEBSTATS_CBPOL_M_DATAFLOW_csv_col.csv", header=T, skip=0)
i5 = i%>%select(-FREQ, -Frequency, -Time.Period)%>%
gather(time,i,-REF_AREA,-Reference.area, na.rm=T)%>%
mutate(time=gsub("X","",time),
time=gsub("[.]","-",time),
time=as.yearmon(time))%>%
filter(Reference.area%in%coulist$Reference.area)%>%
left_join(coulist,by="Reference.area")%>%
arrange(coun_cn,time) %>%
mutate(year=year(time),)%>%
group_by(year,coun_cn)%>%mutate(i_year=i/sum(i))%>%ungroup()%>%
select(coun_cn, year, i_year)
gdp$year <- as.numeric(gdp$year)
i5 <- i5%>%left_join(gdp,by=c("year","coun_cn"))
ggplot(i5, aes(time,i_year,GDP))+geom_line()
gdp=read_excel("gdp.xlsx",skip=2)
country=data.frame(CountryID=c(156,392,250,276,826,840),
cou_cn=c("1 中国","3 日本","5 法国","4 德国","6 英国","2 美国"),
stringsAsFactors = F)
gdp1=gdp%>%
right_join(country,by="CountryID")%>%
filter(IndicatorName %in% c("Gross Domestic Product (GDP)",
"Exports of goods and services",
"Imports of goods and services",
"Mining, Manufacturing, Utilities (ISIC C-E)"))%>%
select(cou_cn,IndicatorName,'1970':'2018')%>% #如果指标名里面数字开头或者
gather(year,value,-IndicatorName,-cou_cn)%>%
spread(IndicatorName,value)%>%
rename(GDP='Gross Domestic Product (GDP)',
EX='Exports of goods and services',
IM='Imports of goods and services',
MAN='Mining, Manufacturing, Utilities (ISIC C-E)')%>%
mutate(year=as.numeric(year),
'GDP(单位:万亿美元)'=GDP/10^12,
'净出口额(单位:亿美元)'=(EX-IM)/10^8,
'采矿、制造和公用事业占GDP的比重'=MAN/GDP)%>%
group_by(year)%>%mutate('GDP的全球份额'=GDP/sum(GDP))%>%group_by()%>%
select(cou_cn,year,'GDP(单位:万亿美元)','净出口额(单位:亿美元)','采矿、制造和公用事业占GDP的比重','GDP的全球份额')%>%
gather(var,value,-year,-cou_cn)
data=read.csv("WEBSTATS_CBPOL_M_DATAFLOW_csv_col.csv")
coulist=data.frame(Reference.area=c("China","United States","Japan","Euro area","United Kingdom"),
cou_cn=c("1 中国","2 美国","3 日本","4 德国","6 英国"))
i=data %>%
select(-FREQ,-Frequency,-Time.Period) %>%
gather(time, i, -REF_AREA,-Reference.area, na.rm =T) %>%
mutate(time=gsub("X","",time),time=gsub("[.]","-",time),time=as.yearmon(time))%>%
filter(Reference.area %in% coulist$Reference.area)%>%
left_join(coulist,by="Reference.area")%>%
mutate(time=as.Date(time),year=year(time))%>%
group_by(year,cou_cn)%>%summarise(i=mean(c(year,cou_cn)))%>%group_by()
gdp2=gdp1%>%
filter(var %in% "GDP(单位:万亿美元)")%>%
spread(var,value)
i_gdp=gdp2%>%
left_join(i,by=c("cou_cn",'year'))%>%
arrange(cou_cn,time)