@jeffjade
2016-10-10T15:04:14.000000Z
字数 6977
阅读 1734
Node.js
Nodejs 遍历文件夹 获取 文件夹下所有文件列表
var fs = require('fs');var root_path=process.argv[2];var w_file='res.lst';function getAllFiles(root){var res = [] , files = fs.readdirSync(root);files.forEach(function(file){var pathname = root+'/'+file, stat = fs.lstatSync(pathname);if (!stat.isDirectory()){res.push(pathname.replace(root_path,'.'));} else {res = res.concat(getAllFiles(pathname));}});return res}var w_content=getAllFiles(root_path).join('\n');fs.readFile(root_path+w_file,function(err , data){if(err && err.errno==33){fs.open(w_file,"w",0666,function(e,fd){if(e) throw e;fs.write(fd,w_content,0,'utf8',function(e){if(e) throw e;fs.closeSync(fd);})});} else{fs.writeFile(root_path+w_file,w_content,function(e){if(e) throw e})}})
参见: http://www.cnblogs.com/litao229/archive/2012/01/04/2312393.html
NodeJs 按行读取文件
var fs = require('fs'),readline = require('readline');var rd = readline.createInterface({input: fs.createReadStream('./test.txt'),output: process.stdout,terminal: false});var i = 1;rd.on('line', function(line) {console.log('第'+ i +'行:'+ line);write(line);i++;});
Nodejs 过滤单个页面中不合法链接:
var http = require("http"),fs = require('fs'),cheerio = require("cheerio");// var root_path=process.argv[2];var url = "http://www.jeffjade.com/2016/03/30/104-front-end-tutorial/"var saveLinkArr = {}// Utility function that downloads a URL and invokes callback with the data.function download(url, callback) {http.get(url, function(res) {var data = "";res.on('data', function(chunk) {data += chunk;});res.on("end", function() {callback(data);});}).on("error", function() {callback(null);});}function writeOut(resData){var filepath = './jeffJadeLinkData.js'var resJson = JSON.stringify(resData , null, 2 )fs.writeFile( filepath, resJson , function(e){if(e) throw e});}download(url, function(data) {if (data) {var $ = cheerio.load(data);$("tbody a").each(function(i, e) {console.log(i, $(e).attr("href"));saveLinkArr[i] = $(e).attr("href")});writeOut( saveLinkArr )console.log("done");} else {console.log("download error !")};});// ==================================================var http = require('http'),parse = require('url').parse;function urlget(url, callback) {var info = parse(url),path = info.pathname + (info.search || ''),options = {host: info.hostname,port: info.port || 80,path: path,method: 'GET'};var req = null, request_timeout = null;request_timeout = setTimeout(function() {request_timeout = null;req.abort();callback(new Error('Request timeout'));}, 5000);req = http.request(options, function(res) {clearTimeout(request_timeout);var chunks = [], length = 0, response_timeout = null;response_timeout = setTimeout(function() {response_timeout = null;req.abort();callback(new Error('Response timeout'));}, 5000);res.on('data', function(chunk) {length += chunk.length;chunks.push(chunk);}).on('end', function() {if(response_timeout) {// node0.5.x及以上:req.abort()会触发res的end事件clearTimeout(response_timeout);var data = new Buffer(length);// ... 此处忽略相同部分的代码 ...}}).on('error', function(err) {// ... 此处忽略相同部分的代码 ...}).on('aborted', function() {if(response_timeout) {// node0.5.x及以上:当res有效的时候,// req.abort()会触发res的aborted事件callback(new Error('Response aborted'), res);}});}).on('error', function(err) {callback(err);});req.end();};// https://fengmk2.com/ppt/those-things-using-nodejs/#slide-10
===============================
var http = require('http'),parse = require('url').parse,fs = require('fs'),needFilterList = require('./jeffJadeLinkData.json');function requestUrl(url, callback) {var info = parse(url),path = info.pathname + (info.search || ''),options = {host: info.hostname,port: info.port || 80,path: path,method: 'GET'};var req = null,request_timeout = null;request_timeout = setTimeout(function() {request_timeout = null;req.abort();callback(url, new Error('Request timeout'));}, 5000);req = http.request(options, function(res) {clearTimeout(request_timeout);var chunks = [],length = 0,response_timeout = null;response_timeout = setTimeout(function() {response_timeout = null;req.abort();callback(url, new Error('Response timeout'));}, 5000);res.on('data', function(chunk) {length += chunk.length;chunks.push(chunk);}).on('end', function() {if (response_timeout) {// node0.5.x及以上:req.abort()会触发res的end事件clearTimeout(response_timeout);var data = new Buffer(length);// ... 此处忽略相同部分的代码 ...}}).on('error', function(err) {// ... 此处忽略相同部分的代码 ...callback(url, err);}).on('aborted', function() {if (response_timeout) {// node 0.5.x及以上:当res有效的时候,// req.abort()会触发res的aborted事件callback(url, new Error('Response aborted'), res);}});}).on('error', function(err) {console.log('what? error ? \n' + url)// callback(url, err);});req.end();};/*===============================================================Read Line: */var errUrlList = []function callback(errUrl, errInfo, errRes) {console.log("err url: " + errUrl)errUrlList.push(errUrl)writeOut(errUrlList)}function jadeFilterLink() {for (var key in needFilterList) {console.log("now turn:" + needFilterList[key])requestUrl(needFilterList[key], callback)}}function writeOut(resData) {var filepath = './err_url_list.json'var resJson = JSON.stringify(resData, null, 2)fs.writeFile(filepath, resJson, function(e) {if (e) throw e});}jadeFilterLink()
var http = require("http"),fs = require('fs'),cheerio = require("cheerio"),parse = require('url').parse;// var root_path=process.argv[2];var url = "http://www.jeffjade.com/2016/03/30/104-front-end-tutorial/",saveLinkArr = {},errUrlList = []// Utility function that downloads a URL and invokes callback with the data.function download(url, callback) {http.get(url, function(res) {var data = "";res.on('data', function(chunk) {data += chunk;});res.on("end", function() {callback(data);});}).on("error", function() {callback(null);});}download(url, function(data) {if (data) {var $ = cheerio.load(data);$("tbody a").each(function(i, e) {console.log(i, $(e).attr("href"));saveLinkArr[i] = $(e).attr("href")});linkCount = saveLinkArr.lengthjadeFilterLink(saveLinkArr)console.log("url-link done !");} else {console.log("download error !")};});function requestUrl(url, callback) {var info = parse(url),path = info.pathname + (info.search || ''),options = {host: info.hostname,port: info.port || 80,path: path,method: 'GET'},req = null,request_timeout = null;request_timeout = setTimeout(function() {request_timeout = null;req.abort();callback(new Error('Request timeout'), url);}, 60000);req = http.request(options, function(res) {clearTimeout(request_timeout);var chunks = [],length = 0;res.on('data', function(chunk) {length += chunk.length;chunks.push(chunk);}).on('end', function() {var data = new Buffer(length);for (var i = 0, pos = 0, l = chunks.length; i < l; i++) {chunks[i].copy(data, pos);pos += chunks[i].length;}res.body = data;callback(null, url);}).on('error', function(err) {callback(err, url);});}).on('error', function(err) {// node0.5.x及以上,调用req.abort()会触发一次“socket hang up” error// 所以需要判断是否超时,如果是超时,则无需再回调异常结果if (request_timeout) {clearTimeout(request_timeout);callback(err, url);}});req.end();};function callback(err, errUrl) {if (err){console.log(err + ' ' + errUrl)errUrlList.push(errUrl + ' ' + err)outPrint(errUrlList)}}function jadeFilterLink(needFilterList) {for (var key in needFilterList) {// console.log("now turn:" + needFilterList[key])(function(key) {requestUrl(needFilterList[key], callback);})(key)}}// requestUrl("https://www.google.com/" , callback)function outPrint(resData) {var filepath = './err_url_list.json'var resJson = JSON.stringify(resData, null, 2)fs.writeFile(filepath, resJson, function(e) {if (e) throw e});}