@jeffjade
2016-10-10T23:04:14.000000Z
字数 6977
阅读 1524
Node.js
Nodejs 遍历文件夹 获取 文件夹下所有文件列表
var fs = require('fs');
var root_path=process.argv[2];
var w_file='res.lst';
function getAllFiles(root){
var res = [] , files = fs.readdirSync(root);
files.forEach(function(file){
var pathname = root+'/'+file
, stat = fs.lstatSync(pathname);
if (!stat.isDirectory()){
res.push(pathname.replace(root_path,'.'));
} else {
res = res.concat(getAllFiles(pathname));
}
});
return res
}
var w_content=getAllFiles(root_path).join('\n');
fs.readFile(root_path+w_file,function(err , data){
if(err && err.errno==33){
fs.open(w_file,"w",0666,function(e,fd){
if(e) throw e;
fs.write(fd,w_content,0,'utf8',function(e){
if(e) throw e;
fs.closeSync(fd);
})
});
} else{
fs.writeFile(root_path+w_file,w_content,function(e){
if(e) throw e
})
}
})
参见: http://www.cnblogs.com/litao229/archive/2012/01/04/2312393.html
NodeJs 按行读取文件
var fs = require('fs'),
readline = require('readline');
var rd = readline.createInterface({
input: fs.createReadStream('./test.txt'),
output: process.stdout,
terminal: false
});
var i = 1;
rd.on('line', function(line) {
console.log('第'+ i +'行:'+ line);
write(line);
i++;
});
Nodejs 过滤单个页面中不合法链接:
var http = require("http"),
fs = require('fs'),
cheerio = require("cheerio");
// var root_path=process.argv[2];
var url = "http://www.jeffjade.com/2016/03/30/104-front-end-tutorial/"
var saveLinkArr = {}
// Utility function that downloads a URL and invokes callback with the data.
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function(chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function() {
callback(null);
});
}
function writeOut(resData){
var filepath = './jeffJadeLinkData.js'
var resJson = JSON.stringify(resData , null, 2 )
fs.writeFile( filepath, resJson , function(e){
if(e) throw e
});
}
download(url, function(data) {
if (data) {
var $ = cheerio.load(data);
$("tbody a").each(function(i, e) {
console.log(i, $(e).attr("href"));
saveLinkArr[i] = $(e).attr("href")
});
writeOut( saveLinkArr )
console.log("done");
} else {
console.log("download error !")
};
});
// ==================================================
var http = require('http'),
parse = require('url').parse;
function urlget(url, callback) {
var info = parse(url),
path = info.pathname + (info.search || ''),
options = {
host: info.hostname,
port: info.port || 80,
path: path,
method: 'GET'
};
var req = null, request_timeout = null;
request_timeout = setTimeout(function() {
request_timeout = null;
req.abort();
callback(new Error('Request timeout'));
}, 5000);
req = http.request(options, function(res) {
clearTimeout(request_timeout);
var chunks = [], length = 0, response_timeout = null;
response_timeout = setTimeout(function() {
response_timeout = null;
req.abort();
callback(new Error('Response timeout'));
}, 5000);
res.on('data', function(chunk) {
length += chunk.length;
chunks.push(chunk);
}).on('end', function() {
if(response_timeout) {
// node0.5.x及以上:req.abort()会触发res的end事件
clearTimeout(response_timeout);
var data = new Buffer(length);
// ... 此处忽略相同部分的代码 ...
}
}).on('error', function(err) {
// ... 此处忽略相同部分的代码 ...
}).on('aborted', function() {
if(response_timeout) {
// node0.5.x及以上:当res有效的时候,
// req.abort()会触发res的aborted事件
callback(new Error('Response aborted'), res);
}
});
}).on('error', function(err) {
callback(err);
});
req.end();
};
// https://fengmk2.com/ppt/those-things-using-nodejs/#slide-10
===============================
var http = require('http'),
parse = require('url').parse,
fs = require('fs'),
needFilterList = require('./jeffJadeLinkData.json');
function requestUrl(url, callback) {
var info = parse(url),
path = info.pathname + (info.search || ''),
options = {
host: info.hostname,
port: info.port || 80,
path: path,
method: 'GET'
};
var req = null,
request_timeout = null;
request_timeout = setTimeout(function() {
request_timeout = null;
req.abort();
callback(url, new Error('Request timeout'));
}, 5000);
req = http.request(options, function(res) {
clearTimeout(request_timeout);
var chunks = [],
length = 0,
response_timeout = null;
response_timeout = setTimeout(function() {
response_timeout = null;
req.abort();
callback(url, new Error('Response timeout'));
}, 5000);
res.on('data', function(chunk) {
length += chunk.length;
chunks.push(chunk);
}).on('end', function() {
if (response_timeout) {
// node0.5.x及以上:req.abort()会触发res的end事件
clearTimeout(response_timeout);
var data = new Buffer(length);
// ... 此处忽略相同部分的代码 ...
}
}).on('error', function(err) {
// ... 此处忽略相同部分的代码 ...
callback(url, err);
}).on('aborted', function() {
if (response_timeout) {
// node 0.5.x及以上:当res有效的时候,
// req.abort()会触发res的aborted事件
callback(url, new Error('Response aborted'), res);
}
});
}).on('error', function(err) {
console.log('what? error ? \n' + url)
// callback(url, err);
});
req.end();
};
/*===============================================================Read Line: */
var errUrlList = []
function callback(errUrl, errInfo, errRes) {
console.log("err url: " + errUrl)
errUrlList.push(errUrl)
writeOut(errUrlList)
}
function jadeFilterLink() {
for (var key in needFilterList) {
console.log("now turn:" + needFilterList[key])
requestUrl(needFilterList[key], callback)
}
}
function writeOut(resData) {
var filepath = './err_url_list.json'
var resJson = JSON.stringify(resData, null, 2)
fs.writeFile(filepath, resJson, function(e) {
if (e) throw e
});
}
jadeFilterLink()
var http = require("http"),
fs = require('fs'),
cheerio = require("cheerio"),
parse = require('url').parse;
// var root_path=process.argv[2];
var url = "http://www.jeffjade.com/2016/03/30/104-front-end-tutorial/",
saveLinkArr = {},
errUrlList = []
// Utility function that downloads a URL and invokes callback with the data.
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function(chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function() {
callback(null);
});
}
download(url, function(data) {
if (data) {
var $ = cheerio.load(data);
$("tbody a").each(function(i, e) {
console.log(i, $(e).attr("href"));
saveLinkArr[i] = $(e).attr("href")
});
linkCount = saveLinkArr.length
jadeFilterLink(saveLinkArr)
console.log("url-link done !");
} else {
console.log("download error !")
};
});
function requestUrl(url, callback) {
var info = parse(url),
path = info.pathname + (info.search || ''),
options = {
host: info.hostname,
port: info.port || 80,
path: path,
method: 'GET'
},
req = null,
request_timeout = null;
request_timeout = setTimeout(function() {
request_timeout = null;
req.abort();
callback(new Error('Request timeout'), url);
}, 60000);
req = http.request(options, function(res) {
clearTimeout(request_timeout);
var chunks = [],
length = 0;
res.on('data', function(chunk) {
length += chunk.length;
chunks.push(chunk);
}).on('end', function() {
var data = new Buffer(length);
for (var i = 0, pos = 0, l = chunks.length; i < l; i++) {
chunks[i].copy(data, pos);
pos += chunks[i].length;
}
res.body = data;
callback(null, url);
}).on('error', function(err) {
callback(err, url);
});
}).on('error', function(err) {
// node0.5.x及以上,调用req.abort()会触发一次“socket hang up” error
// 所以需要判断是否超时,如果是超时,则无需再回调异常结果
if (request_timeout) {
clearTimeout(request_timeout);
callback(err, url);
}
});
req.end();
};
function callback(err, errUrl) {
if (err){
console.log(err + ' ' + errUrl)
errUrlList.push(errUrl + ' ' + err)
outPrint(errUrlList)
}
}
function jadeFilterLink(needFilterList) {
for (var key in needFilterList) {
// console.log("now turn:" + needFilterList[key])
(function(key) {
requestUrl(needFilterList[key], callback);
})(key)
}
}
// requestUrl("https://www.google.com/" , callback)
function outPrint(resData) {
var filepath = './err_url_list.json'
var resJson = JSON.stringify(resData, null, 2)
fs.writeFile(filepath, resJson, function(e) {
if (e) throw e
});
}