[关闭]
@songlaf 2016-05-05T09:48:46.000000Z 字数 3335 阅读 522

作业五【MapReduce词频统计】

未分类


  1. <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3. <modelVersion>4.0.0</modelVersion>
  4. <groupId>njt.song.study</groupId>
  5. <artifactId>hadoop</artifactId>
  6. <version>0.0.1-SNAPSHOT</version>
  7. <packaging>jar</packaging>
  8. <name>hadoop</name>
  9. <url>http://maven.apache.org</url>
  10. <properties>
  11. <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  12. </properties>
  13. <dependencies>
  14. <dependency>
  15. <groupId>org.apache.hadoop</groupId>
  16. <artifactId>hadoop-hdfs</artifactId>
  17. <version>2.5.1</version>
  18. </dependency>
  19. <dependency>
  20. <groupId>org.apache.hadoop</groupId>
  21. <artifactId>hadoop-common</artifactId>
  22. <version>2.5.1</version>
  23. </dependency>
  24. <dependency>
  25. <groupId>org.apache.hadoop</groupId>
  26. <artifactId>hadoop-mapreduce-client-core</artifactId>
  27. <version>2.5.1</version>
  28. </dependency>
  29. <dependency>
  30. <groupId>junit</groupId>
  31. <artifactId>junit</artifactId>
  32. <version>3.8.1</version>
  33. <scope>test</scope>
  34. </dependency>
  35. </dependencies>
  36. </project>
  1. package njt.song.study.hadoop;
  2. import java.io.IOException;
  3. import org.apache.hadoop.conf.Configuration;
  4. import org.apache.hadoop.conf.Configured;
  5. import org.apache.hadoop.fs.Path;
  6. import org.apache.hadoop.io.IntWritable;
  7. import org.apache.hadoop.io.Text;
  8. import org.apache.hadoop.mapreduce.Job;
  9. import org.apache.hadoop.mapreduce.Mapper;
  10. import org.apache.hadoop.mapreduce.Reducer;
  11. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  12. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  13. import org.apache.hadoop.util.Tool;
  14. import org.apache.hadoop.util.ToolRunner;
  15. public class WordCount extends Configured implements Tool {
  16. public static class WordCountMapper extends Mapper<Object,Text,Text,IntWritable> {
  17. private static final IntWritable inputKey = new IntWritable(1);
  18. private Text outPutValue = new Text();
  19. protected void map(Object key, Text value, Context context)
  20. throws IOException, InterruptedException {
  21. String line = value.toString();
  22. String[] words = line.split("\t");
  23. for(String word:words){
  24. outPutValue.set(word);
  25. context.write(outPutValue, inputKey);
  26. }
  27. }
  28. }
  29. public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
  30. private IntWritable total = new IntWritable();
  31. @Override
  32. protected void reduce(Text key, Iterable<IntWritable> values,Context context)
  33. throws IOException, InterruptedException {
  34. int sum = 0;
  35. for(IntWritable value:values){
  36. sum += value.get();
  37. }
  38. total.set(sum);
  39. context.write(key,total);
  40. }
  41. }
  42. public static void main(String[] args) throws Exception{
  43. Configuration conf = new Configuration();
  44. int status = ToolRunner.run(conf, new WordCount(),args);
  45. System.exit(status);
  46. }
  47. public int run(String[] args) throws Exception {
  48. Configuration configuration = super.getConf();
  49. Job job = Job.getInstance(configuration,this.getClass().getSimpleName());
  50. job.setJarByClass(WordCount.class);
  51. Path inPath = new Path(args[0]);
  52. FileInputFormat.addInputPath(job, inPath);
  53. job.setMapperClass(WordCountMapper.class);
  54. job.setMapOutputKeyClass(Text.class);
  55. job.setMapOutputValueClass(IntWritable.class);
  56. job.setReducerClass(WordCountReducer.class);
  57. job.setOutputKeyClass(Text.class);
  58. job.setOutputValueClass(IntWritable.class);
  59. Path outPath = new Path(args[1]);
  60. FileOutputFormat.setOutputPath(job, outPath);
  61. boolean isSuccess = job.waitForCompletion(true);
  62. return isSuccess ? 0 : 1;
  63. }
  64. }
  1. bin/yarn jar /home/sjf/myWordcount.jar /input/word.txt /output

结果.jpg-300.9kB

查看结果

  1. bin/hdfs dfs -ls /output
  1. bin/hdfs dfs cat /output/part-r-00000

无标题222.jpg-8.4kB

2.jpg-22.2kB

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注