@fanxy
2018-04-02T18:45:05.000000Z
字数 3548
阅读 2501
R语言
常用命令
library(httr)
library(curl)
library(rvest)
tryCatch(read_html(url, error=function(err) "Error 404")!="Error 404" # 保证网页存在
page = read_html(url, encoding="GBK") # 读取网页内容,指定编码为gbk
page=as(page, "character")
write.csv(page,file="page.csv")
page=data.frame(var=readLines("page.csv"),stringsAsFactors = F)
abc=data.frame(var=str_subset(page$var, "abc")) # 提取包含abc的行
data=read.csv("a.txt",sep="\t",encoding="UTF-8",fill=T,stringsAsFactors = F,quote = "")
data=read.table("a.txt",encoding="UTF-8",stringsAsFactors=FALSE,sep="\t",quote="")
data=data.frame(V1=readLines("a.txt",encoding="UTF-8"),stringsAsFactors = F) # 逐行读,含中文
# 给表格Ta加表头
colnames(Ta)=colnames(read.table("clipboard",sep="\t",header=T))
links="http://.."
page <- links %>% read_html()
page=as(page, "character")
write.csv(page,file="page.csv")
page=data.frame(var=readLines("page.csv"),stringsAsFactors = F)
filelist=data.frame(var=str_subset(page$var, "<a href=\"\""),stringsAsFactors = F)%>%
mutate(link=sub(".*<a href=\"\"","",var))%>%
mutate(link=sub("\"\">.*","",link))
filelist=str_subset(filelist$link, "[.]") # 多种文件格式,.pdf, .dta等
for (i in 1:nrow(filelist)){
filename=paste(links,filelist[i],sep="")
filename=sub("amp;","",filename)
destfile=paste("D:\\...", #目标文件夹
filelist[i],sep="")
download.file(filename,destfile=destfile,mode="wb")
}
正则表达式:http://www.cnblogs.com/zxin/archive/2013/01/26/2877765.html
# 大小写转换
library(stringr) #加载包
dog <- "The quick brown dog"
str_to_upper(string = dog) #转换成大写 "THE QUICK BROWN DOG"
str_to_lower(string = dog) #转换成小写 "the quick brown dog"
str_to_title(string = dog) #单词首字母转换大写 "The Quick Brown Dog"
gsub("[:a-z:]","",dog) #去掉所有小写字母
gsub("[:A-Z:]","",dog) #去掉所有小写字母
# ABC 包含几个编号(如一篇文章的JEL、Author等),将其分解,存为名为ABC的变量
list=strsplit(data$ABC,";") # 将ABC用;分拆为列表,每个元素为字符向量
names(list)=data$idno # 用idno命名各元素(如文章编号)
ABC=data.frame(idno=rep(names(list),sapply(list,length)),
ABC=unlist(list),stringsAsFactors = F) # 将列表转换为数据框
+ geom_abline(slope = 1)+ # 加对角线
geom_text_repel(aes(label=label))+ # library(ggrepel),点的文本分开
labs(title="",x="",y="")+
scale_x_continuous(breaks = c(1997, 2003, 2008, 2014))+
guides(fill=guide_legend(title=NULL))+ theme_bw()+ theme(legend.position="non")
kronecker(matrix(1, M, N), A) # 矩阵A行方向复制M次、列方向复制N次
kronecker(diag(1, N), A) # 以A为对角元素的对角阵
# 微分
expFun=expression(a*x^2) # 表达式
expFun
a=0.5; x=2; eval(expFun) # 求表达式在a=0.5, x=1处的值
Fun=as.function(alist(x=, a=0.5, a*x^2)) # 函数f(x;a=0.5)
Fun
Fun(x=2) # 求函数值f(1)
D1x=D(expFun, "x") # 对表达式中x求一阶偏导
D1x
D1a=D(expFun, "a")
data.frame(x,a,expFun=as.character(expFun),
D1x=eval(D1x), D1a=eval(D1a))
DD <- function(expr, name, order){ # 写函数,循环求高阶导数
if (order < 1)
stop("'order' must be >= 1")
if (order == 1)
D(expr, name) else DD(D(expr, name), name, order - 1)}
DD(expFun, "x", 2) # 调用函数求2阶导
DD(expFun, "x", 3)
NormDensity <- expression(1/sqrt(2 * pi) * exp(-x^2/2)) # 正态密度函数
class(NormDensity)
D(NormDensity, "x") # 求一阶导数
deriv(NormDensity, "x")
deriv3(NormDensity, "x")
DD(NormDensity, "x", 3)
# 积分
library(Ryacas)
yacas(expression(integrate(1/x, x)))
yacas("Integrate(x)1/x")
x <- Sym("x"); Integrate(1/x, x)
yacas("Integrate(t)a*t")
Simplify(yacas("Integrate(t)a*t"))
老鼠4*4方格,概率
a=matrix(c(0.6, 0.5, 0, 0.2, 0.5, 1/3, 0, 0.5, 1/3),3,byrow=T)
old=rep(1,3)
for (i in 1:5){
new=a %*% old
N=t(new)%*%c(4,8,4)
print(data.frame(new, old))
print(N)
old=new
}
library(expm)
print(round(rowSums(a%^%10),3))
b=matrix(c(0.6,0.4,0,0.25,0.5,0.25,0,2/3,1/3),3,byrow=T)
print(round(colSums(c(4,8,4)%*%b%^%10),3))
(b%^%10)[2,]*16/c(4,8,4)