|
|
Answer:
|
|
|
|
public static class TokenizerMapper
extends Mapper<Object, Text, Text, IntWritable>
{
private final static IntWritable one = new IntWritable(1);
// one = 1 as an object
private Text word = new Text(); // Output help variable
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException
{
/* ================================================
Tokenize input parameter (document) "value"
================================================ */
StringTokenizer itr = new StringTokenizer(value.toString());
while ( itr.hasMoreTokens() )
{
word.set(itr.nextToken()); // Get next word
context.write(word, one); // Output: (next word, 1)
}
}
}
|
Notice that:
|
public static class IntSumReducer
extends Reducer<Text,IntWritable,Text,IntWritable>
{
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable
|
Notice that:
|
public static void main(String[] args) throws Exception
{
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class); // Set map function
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class); // Set reduce function
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
|
Demo:
Go to aruba:
ssh cheung@aruba
|