@TedZhou
2018-12-06T02:19:12.000000Z
字数 2210
阅读 615
mongodb mapreduce javascript
Map-Reduce是一种计算模型,简单的说就是将大批量的工作(数据)分解(MAP)执行,然后再将结果合并成最终结果(REDUCE)。
MongoDB提供的Map-Reduce非常灵活,可以高效的进行大规模数据的统计分析。
db.collection.mapReduce(function() {emit(key,value)},//map 函数function(key,values) {return reduced}, //reduce 函数<collection>//out collection)//第三个参数也可以传入更多选项:{finalize: function(key, reduced){return finalized}out: <collection>|{inline:true},query: <document>,sort: <document>,limit: <number>,}
db.runCommand({mapReduce: <collection>,map: <function>,reduce: <function>,finalize: <function>,out: <output>,query: <document>,sort: <document>,limit: <number>,scope: <document>,jsMode: <boolean>,verbose: <boolean>,bypassDocumentValidation: <boolean>,collation: <document>})
示例集合person存储各省市居民的姓名、性别等记录
1.{"name" : "姓名1","gender" : "男","city" : "城市a","province" : "省份A"}2.{"name" : "姓名2","gender" : "女","city" : "城市b","province" : "省份B"}3...
现用mapReduce统计各省市人口性别比例:
db.person.mapReduce(function(){//mapvar key = {province:this.province, city:this.city}var value = {total: 1}if (this.gender == '男'){value.male = 1}else if (this.gender == '女'){value.female = 1}else{value.unknown = 1}emit(key, value)},function(key, values){//reducevar value = {}//累计各性别的数量values.forEach(function(item){//item里可能是单个值,也可能已经是累计值for (var k in item){value[k] = (value[k]||0) + item[k]}})return value//reduce返回值不支持数组,若需多值请用对象格式},{query:{},//指定过滤源数据的查询条件sort:{province:1, city:1},//按key排序可减少reduce的次数,加快执行速度finalize: function(key, rValue){for (var k in rValue){if (k !== 'total'){//数量转为比例rValue[k] = rValue[k]/rValue.total}}return rValue},out:{inline:true},}).find()
执行结果如下:
[{"_id" : {"province" : "省份A","city" : "城市a"},"value" : {"total" : 40.0,"male" : 0.425,"female" : 0.575}},{"_id" : {"province" : "省份A","city" : "城市b"},"value" : {"total" : 1.0,"male" : 1.0}},{"_id" : {"province" : "省份A","city" : "城市c"},"value" : {"total" : 150.0,"male" : 0.526,"female" : 0.473}},...]
