[关闭]
@fanxy 2018-04-02T18:45:05.000000Z 字数 3548 阅读 2474

R 语言常用命令

R语言 常用命令


1. 数据读取

1.1 读取网页

  1. library(httr)
  2. library(curl)
  3. library(rvest)
  4. tryCatch(read_html(url, error=function(err) "Error 404")!="Error 404" # 保证网页存在
  5. page = read_html(url, encoding="GBK") # 读取网页内容,指定编码为gbk
  6. page=as(page, "character")
  7. write.csv(page,file="page.csv")
  8. page=data.frame(var=readLines("page.csv"),stringsAsFactors = F)
  9. abc=data.frame(var=str_subset(page$var, "abc")) # 提取包含abc的行

1.2 读取文本

  1. data=read.csv("a.txt",sep="\t",encoding="UTF-8",fill=T,stringsAsFactors = F,quote = "")
  2. data=read.table("a.txt",encoding="UTF-8",stringsAsFactors=FALSE,sep="\t",quote="")
  3. data=data.frame(V1=readLines("a.txt",encoding="UTF-8"),stringsAsFactors = F) # 逐行读,含中文
  4. # 给表格Ta加表头
  5. colnames(Ta)=colnames(read.table("clipboard",sep="\t",header=T))

1.3 批量下载

  1. links="http://.."
  2. page <- links %>% read_html()
  3. page=as(page, "character")
  4. write.csv(page,file="page.csv")
  5. page=data.frame(var=readLines("page.csv"),stringsAsFactors = F)
  6. filelist=data.frame(var=str_subset(page$var, "<a href=\"\""),stringsAsFactors = F)%>%
  7. mutate(link=sub(".*<a href=\"\"","",var))%>%
  8. mutate(link=sub("\"\">.*","",link))
  9. filelist=str_subset(filelist$link, "[.]") # 多种文件格式,.pdf, .dta等
  10. for (i in 1:nrow(filelist)){
  11. filename=paste(links,filelist[i],sep="")
  12. filename=sub("amp;","",filename)
  13. destfile=paste("D:\\...", #目标文件夹
  14. filelist[i],sep="")
  15. download.file(filename,destfile=destfile,mode="wb")
  16. }

2. 数据处理

2.1 字符处理

  1. 正则表达式:http://www.cnblogs.com/zxin/archive/2013/01/26/2877765.html
  2. # 大小写转换
  3. library(stringr) #加载包
  4. dog <- "The quick brown dog"
  5. str_to_upper(string = dog) #转换成大写 "THE QUICK BROWN DOG"
  6. str_to_lower(string = dog) #转换成小写 "the quick brown dog"
  7. str_to_title(string = dog) #单词首字母转换大写 "The Quick Brown Dog"
  8. gsub("[:a-z:]","",dog) #去掉所有小写字母
  9. gsub("[:A-Z:]","",dog) #去掉所有小写字母
  1. # ABC 包含几个编号(如一篇文章的JEL、Author等),将其分解,存为名为ABC的变量
  2. list=strsplit(data$ABC,";") # 将ABC用;分拆为列表,每个元素为字符向量
  3. names(list)=data$idno # 用idno命名各元素(如文章编号)
  4. ABC=data.frame(idno=rep(names(list),sapply(list,length)),
  5. ABC=unlist(list),stringsAsFactors = F) # 将列表转换为数据框

3. 作图

  1. + geom_abline(slope = 1)+ # 加对角线
  2. geom_text_repel(aes(label=label))+ # library(ggrepel),点的文本分开
  3. labs(title="",x="",y="")+
  4. scale_x_continuous(breaks = c(1997, 2003, 2008, 2014))+
  5. guides(fill=guide_legend(title=NULL))+ theme_bw()+ theme(legend.position="non")

4. 运算

4.1 矩阵重复

  1. kronecker(matrix(1, M, N), A) # 矩阵A行方向复制M次、列方向复制N次
  2. kronecker(diag(1, N), A) # 以A为对角元素的对角阵

4.2 符号计算

  1. # 微分
  2. expFun=expression(a*x^2) # 表达式
  3. expFun
  4. a=0.5; x=2; eval(expFun) # 求表达式在a=0.5, x=1处的值
  5. Fun=as.function(alist(x=, a=0.5, a*x^2)) # 函数f(x;a=0.5)
  6. Fun
  7. Fun(x=2) # 求函数值f(1)
  8. D1x=D(expFun, "x") # 对表达式中x求一阶偏导
  9. D1x
  10. D1a=D(expFun, "a")
  11. data.frame(x,a,expFun=as.character(expFun),
  12. D1x=eval(D1x), D1a=eval(D1a))
  13. DD <- function(expr, name, order){ # 写函数,循环求高阶导数
  14. if (order < 1)
  15. stop("'order' must be >= 1")
  16. if (order == 1)
  17. D(expr, name) else DD(D(expr, name), name, order - 1)}
  18. DD(expFun, "x", 2) # 调用函数求2阶导
  19. DD(expFun, "x", 3)
  20. NormDensity <- expression(1/sqrt(2 * pi) * exp(-x^2/2)) # 正态密度函数
  21. class(NormDensity)
  22. D(NormDensity, "x") # 求一阶导数
  23. deriv(NormDensity, "x")
  24. deriv3(NormDensity, "x")
  25. DD(NormDensity, "x", 3)
  26. # 积分
  27. library(Ryacas)
  28. yacas(expression(integrate(1/x, x)))
  29. yacas("Integrate(x)1/x")
  30. x <- Sym("x"); Integrate(1/x, x)
  31. yacas("Integrate(t)a*t")
  32. Simplify(yacas("Integrate(t)a*t"))

5. 老鼠4*4方格,概率

老鼠4*4方格,概率

  1. a=matrix(c(0.6, 0.5, 0, 0.2, 0.5, 1/3, 0, 0.5, 1/3),3,byrow=T)
  2. old=rep(1,3)
  3. for (i in 1:5){
  4. new=a %*% old
  5. N=t(new)%*%c(4,8,4)
  6. print(data.frame(new, old))
  7. print(N)
  8. old=new
  9. }
  10. library(expm)
  11. print(round(rowSums(a%^%10),3))
  12. b=matrix(c(0.6,0.4,0,0.25,0.5,0.25,0,2/3,1/3),3,byrow=T)
  13. print(round(colSums(c(4,8,4)%*%b%^%10),3))
  14. (b%^%10)[2,]*16/c(4,8,4)

参考文献

  1. 黄湘云:R语言做符号计算
  2. K. Soetaert and T. Petzoldt: CRAN Task View: Differential Equations
  3. R.H. Shumway and D.S. Stoffer, 2016: Time Series Analysis and Its Applications: With R Examples(4th ed.), Springer
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注