@fanxy
2020-03-15T16:52:43.000000Z
字数 8850
阅读 6090
樊潇彦
复旦大学经济学院
金融数据
下载 Ch03.rar,解压缩后存于工作目录下。
setwd("D:\\...\\Ch03") # 设定工作目录,注意为/或\\
rm(list=ls()) # 清内存
## 调用之前已安装且当前要用的包
library(tidyverse)
library(readstata13)
library(haven)
library(readxl)
## 安装和调用本节要用的包
install.packages(c("ggplot2","ggvis","shiny","dygraphs"))
library(ggplot2)
library(ggvis)
library(shiny)
library(dygraphs)
# 基础绘图 plot,包括 points, lines, scatter, histogram等
data(iris)
plot(iris$Sepal.Length) # 单变量点图
plot(iris$Sepal.Length,type="l") # 单变量线图
plot(x=iris$Sepal.Length, y=iris$Petal.Length) # 双变量散点图
plot(x=iris$Sepal.Length, y=iris$Petal.Length, type="h") # 双变量柱状图
plot(x=iris$Species, y=iris$Petal.Length, type="h") # 连续变量按离散变量分组的箱式图
# 条形图 barplot
msft <- c(26.85,27.41,28.21,32.64,34.66,34.30,31.62,33.40)
msft.returns <- msft[-1] / msft[-length(msft)] - 1
names(msft.returns) <- month.abb[1:length(msft.returns)]
barplot(msft.returns, col="blue")
barplot(msft.returns, names.arg=month.name[1:length(msft.returns)],
col="blue",las=2)
# 饼图 pie
x <-1:5;pie(x,col=rainbow(5))
box()
# 向日葵图 sunflowerplot
sunflowerplot(iris[,3:4])
# 绘制矩阵或数据框的二元图 pairs
data(iris)
pairs(iris[1:4], main = "Anderson's Iris Data -- 3 species",pch = 21,
bg = c("red", "green3", "blue")[unclass(iris$Species)])
# 多个数据作图 matplot
set.seed(1)
x <- cumsum(rnorm(50))
y <- cumsum(rnorm(50))
z <- cumsum(rnorm(50))
matplot(cbind(x,y,z),col=2:4,type="l",lty=1, xlab="", ylab="")
legend("bottom",legend=c("x","y","z"),
lty=1,col=2:4,bty="n")
# 根据指定函数绘制指定范围的曲线图
curve(sin, -2*pi, 2*pi, xname = "t")
# 以参数`par`和标签`legend`为例:
intc <- c(20.42,20.48,21.43,23.50,24.04,24.00,23.11,21.98)
intc.returns <- intc[-1] / intc[-length(intc)] - 1
barplot(rbind(msft.returns,intc.returns),beside=T,col=c(2,4))
legend(x="topleft",legend=c("MSFT","INTC"),pch=15,col=c(2,4),bty="n")
# 生成一个绘图窗口在其中绘制图形后用savePlot()函数保存
windows()
plot(1:10)
rect(1, 5, 3, 7, col="blue")
savePlot("test01", type="jpg",device=dev.cur(),restoreConsole=TRUE)
# 直接在jpeg设备上绘制图形,完成后使用dev.off()关闭设备,存盘退出
jpeg(file="myplot.jpeg")
plot(1:10)
rect(1, 5, 3, 7, col="blue")
dev.off()
Basic Components of a ggplot2 Plot
library(ggplot2)
data(diamonds) # 调用钻石数据
set.seed(42) # 设随机数,抽1000个样本
small <- diamonds[sample(nrow(diamonds), 1000), ]
head(small)
# 生成空白图,查看ggplot对象要素
p = ggplot()
class(p)
names(p)
# 数据和映射
# 以克拉(carat)、切工(cut)、透明度(clarity)等因素对钻石价格(price)的影响为例:
ggplot(data = small,
mapping = aes(x = carat, y = price, # 纵横轴
color = cut, shape = clarity)
) +
geom_point()
# 几何与统计
ggplot(small) +
geom_density(aes(x=price, colour=cut))
# 分面与标签
ggplot(small, aes(x=carat, y=price, color=color))+ # 标题、纵横轴标签
geom_point() +
facet_wrap(~cut) + # 一页多图
labs(title ="Diamonds", x = "Carat", y = "Price") +
theme_bw()
1.主要命令
library(ggvis)
library(dplyr)
data(mtcars)
# 散点和拟合线:points, smooths
mtcars %>%
ggvis(~wt, ~mpg) %>%
layer_points(fill= ~factor(cyl))%>% # 除填充(fill)外,还可设边界(stroke)、大小(size)、形状(shape)和透明度(opacity)
layer_lines(stroke:= "gray") %>%
layer_smooths()
# 线和条:lines, bars
data(pressure)
pressure %>%
ggvis(~temperature, ~pressure) %>%
layer_lines(stroke := "red") %>%
layer_bars(width=20, fill:=NA)
2.统计作图
# Boxplots
mtcars %>%
ggvis(~factor(cyl), ~mpg) %>%
layer_boxplots()
# Histograms
cocaine %>%
ggvis(~potency) %>%
layer_histograms(width = 10, center = 0, fill := "pink") %>%
add_axis("x", title = "potency") %>%
add_axis("y", title = "histograms")
# Densities
cocaine %>%
ggvis(~potency) %>%
layer_densities(fill := NA, stroke := "red") %>%
add_axis("x", title = "potency") %>%
add_axis("y", title = "densities")
3.回归预测
mtcars %>%
ggvis(~wt, ~mpg) %>%
layer_points() %>%
layer_smooths() %>%
layer_model_predictions(stroke := "red", model = "lm", se = TRUE)
mtcars %>%
ggvis(~wt, ~mpg, fill = ~factor(cyl)) %>%
layer_points() %>%
group_by(cyl) %>% # 分组回归
layer_model_predictions(model = "lm")
4.交互式作图
# 1) 连续调整
mtcars %>%
ggvis(~wt, ~mpg) %>%
layer_points(size := input_slider(10, 100, value = 30))%>% # 调节点大小
layer_smooths(span = input_slider(0.5, 1, value = 1)) # 调节拟合的窗宽
# 2) 文本框
mtcars %>%
ggvis(x = ~wt) %>%
layer_densities(
adjust = input_slider(.1, 2, value = 1, step = .1, label = "Bandwidth adjustment"),
kernel = input_select(c("Gaussian" = "gaussian",
"Epanechnikov" = "epanechnikov",
"Rectangular" = "rectangular",
"Triangular" = "triangular",
"Biweight" = "biweight",
"Cosine" = "cosine",
"Optcosine" = "optcosine"),
label = "Kernel"))
# 3) 动态图
dat <- data.frame(time = 1:10, value = runif(10))
ddat <- reactive({invalidateLater(2000, NULL)
dat$time <<- c(dat$time[-1], dat$time[length(dat$time)] + 1)
dat$value <<- c(dat$value[-1], runif(1))
dat })
ddat %>%
ggvis(x = ~time, y = ~value, key := ~time) %>%
layer_points() %>%
layer_paths()
library(dygraphs)
# dyAnnotation:标注
dygraph(presidents, main = "Presidential Approval") %>%
dyAxis("y", valueRange = c(0, 100)) %>%
dyAnnotation("1950-7-1", text = "A", tooltip = "Korea") %>%
dyAnnotation("1965-1-1", text = "B", tooltip = "Vietnam")
# dyAxis:坐标轴
dygraph(nhtemp, main = "New Haven Temperatures") %>%
dyAxis("y", label = "Temp (F)", valueRange = c(40, 60)) %>%
dyOptions(axisLineWidth = 1.5, fillGraph = TRUE, drawGrid = FALSE)
# dyEvent:事件
dygraph(presidents, main = "Presidential Approval") %>%
dyAxis("y", valueRange = c(0, 100)) %>%
dyEvent("1950-6-30", "Korea", labelLoc = "bottom") %>%
dyEvent("1965-2-09", "Vietnam", labelLoc = "bottom")
# dyHighlight:提亮
lungDeaths <- cbind(ldeaths, mdeaths, fdeaths)
dygraph(lungDeaths, main = "Deaths from Lung Disease (UK)") %>%
dyHighlight(highlightCircleSize = 5,
highlightSeriesBackgroundAlpha = 0.2,
hideOnMouseOut = FALSE)
# dyLegend:标签
dygraph(nhtemp, main = "New Haven Temperatures") %>%
dySeries("V1", label = "Temperature (F)") %>%
dyLegend(show = "always", hideOnMouseOut = FALSE)
# dyLimit:极值
dygraph(presidents, main = "Presidential Approval") %>%
dyAxis("y", valueRange = c(0, 100)) %>%
dyLimit(max(presidents, na.rm = TRUE), "Max",
strokePattern = "solid", color = "blue")
# dyOptions:选项
dygraph(lungDeaths) %>% dyRangeSelector()
dygraph(lungDeaths) %>%
dySeries("mdeaths", label = "Male") %>%
dySeries("fdeaths", label = "Female") %>%
dyOptions(stackedGraph = TRUE) %>%
dyRangeSelector(height = 20)
hw <- HoltWinters(ldeaths)
predicted <- predict(hw, n.ahead = 72, prediction.interval = TRUE)
dygraph(predicted, main = "Predicted Lung Deaths (UK)") %>%
dyAxis("x", drawGrid = FALSE) %>%
dySeries(c("lwr", "fit", "upr"), label = "Deaths") %>%
dyOptions(colors = RColorBrewer::brewer.pal(3, "Set1"))
# dyRangeSelector:时间区
dygraph(nhtemp, main = "New Haven Temperatures") %>%
dyRangeSelector()
dygraph(nhtemp, main = "New Haven Temperatures") %>%
dyRangeSelector(dateWindow = c("1920-01-01", "1960-01-01"))
dygraph(nhtemp, main = "New Haven Temperatures") %>%
dyRangeSelector(height = 20, strokeColor = "")
# dyRoller:滚动平滑
# Y values are averaged over the specified number of time scale units.
dygraph(discoveries, main = "Important Discoveries") %>%
dyRoller(rollPeriod = 5)
# dyShading:阴影区
dygraph(nhtemp, main = "New Haven Temperatures") %>%
dyShading(from = "1920-1-1", to = "1930-1-1") %>%
dyShading(from = "1940-1-1", to = "1950-1-1")
dygraph(nhtemp, main = "New Haven Temperatures") %>%
dyShading(from = "48", to = "52", axis = "y") %>%
dyShading(from = "50", to = "50.1", axis = "y", color = "black")
介绍变量赋值、分支结构、循环结构、函数使用、获取帮助等知识。
x <- 1.5
cat("x = ",x,"\n",sep="") # 屏幕显示,也可用于测试程序
y1 <- c(1.5,2.3,8.6,7.4,9.2)
y2 <- c("MSFT","GOOG","AAPL")
y3 <- c(T,F,T,T,F,F)
3.1415926 -> z; # 数据在左,变量名在右赋值,但比较少用
assign("t",1.414) # assign()函数给变量赋值
# if
a <- 1
if(a==1) print("a==1")
a <- 2
if(a > 1) print("a > 1") else print("a <= 1")
a <- 3
if( a == 1){
a # 不会显示 a 的值
print("I am a boy!")
}else{ # 如果有多行命令,需要用{}引起来,else必须紧跟在}后面
print(a) # 会显示 a 的值
print("I am a girl!")
}
# 1) if - else if
a <- 4
if( a == 1)
{
print("a == 1")
}else if( a == 2) # 同样每个else必须和前面的}紧紧粘在一起
{
print("a == 2")
}else
{
print("Not 1 & 2")
}
# 2) ifelse()计算第一个逻辑表达式得到结果如果为T则返回第二个参数;否则返回第三个参数
a <- 2
ifelse(a > 1,3.1416,1.414)
# 3) switch语句的多重分支结构
switch(a,
print("选项1"),
print("选项2"),
print("选项3"),
print("选项4"),
print("选项5")
)
# 1) for
iTotal <- 0
for(i in 1:100) # 用关键词in枚举向量中的每一整数
{
iTotal <- iTotal + i
}
cat("1-100的累加和为:",iTotal,"\n",sep="")
szSymbols <- c("MSFT","GOOG","AAPL","INTL","ORCL","SYMC")
for(SymbolName in szSymbols) # 字符串也可以枚举
{
cat(SymbolName,"\n",sep="")
}
# 2) while
i <- 1
iTotal <- 0
while(i <= 100)
{
iTotal <- iTotal + i
i <- i + 1
}
cat("1-100的累加和为:",iTotal,"\n",sep="") # 屏幕显示结果
# 3) repeat
i <- 1
iTotal <- 0
repeat # 无条件循环,必须在程序内部设法退出
{
iTotal <- iTotal + i
i <- i + 1
if(i <= 100) next else break # 注意:next,break的用法
}
cat("1-100的累加和为:",iTotal,"\n",sep="")
# 对于小函数,可写好后直接调用。如计算矩阵的幂:
mat_power = function(A, n){
Apower=A
for (i in 2:n) Apower= Apower %*% A
return(Apower)
}
A = matrix(c(1:4),2)
mat_power(A, 3)
A %*% A %*% A
# 对于较大的函数,要另存为.r 文件,再调用。
rm("mat_power")
source("myfun.r") # 调用自编程序
mat_power(A, 3)
?print # 在RStudio右侧打开相关帮助界面
example(print) # 命令示例
?quantmod # 打开扩展包整体帮助信息
apropos("print*") # 在搜索路径下查找满足正则表达式的所有函数信息
demo(graphics)
# 如果对包或命令的具体名称不清楚,可以从 google 或 http://rseek.org/ 上查找。