MR基础案例（四）二次排序

分手后的思念是犯贱 2024-04-18 12:54 73阅读 0赞

### 二次排序 ###

输入

20 21
    50 51
    50 53
    50 52
    50 54
    60 51
    60 53
    60 52
    60 56
    60 57
    70 58
    60 61
    70 54
    70 55
    70 56
    70 57
    70 58

结果:

20      21
    50      51
    50      52
    50      53
    50      54
    60      51
    60      52
    60      53
    60      56
    60      57
    60      61
    70      54
    70      55
    70      56
    70      57
    70      58
    70      58

方法1：  
第一组利用shuffle中的排序特点，但只能升序，第二列放到list中(然后对list排序)，遍历list输出  
方法2：  
自定义数据类型

SortSecondaryDemo.java

package MR;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;
    
    import java.io.IOException;
    
    public class SortSecondaryDemo implements Tool {
        
        /**
         * map阶段
         * @author lyd
         *
         */
        public static class MyMapper extends Mapper<LongWritable, Text, SecondarySortWritable, IntWritable> {
        
    
            SecondarySortWritable ss = new SecondarySortWritable();
            @Override
            protected void map(LongWritable key, Text value,Context context)
                    throws IOException, InterruptedException {
        
                String line = value.toString();
                String dig [] = line.split(" ");
                ss.setFirst(Integer.parseInt(dig[0]));
                ss.setSecond(Integer.parseInt(dig[1]));
                context.write(ss, new IntWritable(Integer.parseInt(dig[1])));
            }
        }
    
        /**
         * reduce阶段
         * @author lyd
         *
         */
    	public static class MyReducer extends Reducer<SecondarySortWritable, IntWritable, SecondarySortWritable, IntWritable> {
        
    
    		@Override
    		protected void reduce(SecondarySortWritable key, Iterable<IntWritable> values,Context context)
    				throws IOException, InterruptedException {
        
    
    		    for (IntWritable i : values){
        
    		        context.write(key,i);
                }
    		}
    	}
    
    
        public void setConf(Configuration conf) {
        
            //对conf的属性设置
        }
    
        public Configuration getConf() {
        
            return new Configuration();
        }
    
        /**
         * 驱动方法
         */
        public int run(String[] args) throws Exception {
        
            Configuration conf = getConf();
            Job job = Job.getInstance(conf, "ssjob");
            job.setJarByClass(SortSecondaryDemo.class);
    
            // set inputpath and outputpath
            setInputAndOutput(job, conf, args);
    
            job.setMapperClass(MyMapper.class);
            job.setMapOutputKeyClass(SecondarySortWritable.class);
            job.setMapOutputValueClass(IntWritable.class);
    
    		job.setReducerClass(MyReducer.class);
    		job.setOutputKeyClass(SecondarySortWritable.class);
    		job.setOutputValueClass(IntWritable.class);
            //提交
            return job.waitForCompletion(true) ? 0 : 1;
        }
    
        //主方法
        public static void main(String[] args) throws Exception {
        
            int isok= ToolRunner.run(new Configuration(), new SortSecondaryDemo(), args);
            System.exit(isok);
        }
    
        /**
         * 处理参数的方法
         * @param job
         * @param conf
         * @param args
         */
        public static void setInputAndOutput(Job job,Configuration conf,String[] args){
        
            //正常处理输入输出参数
            try {
        
                FileInputFormat.addInputPath(job, new Path(args[0]));
    
               //FileSystem fs = FileSystem.get(conf);
                Path outputPath = new Path(args[1]);
                /*if(fs.exists(outputPath)){
                    fs.delete(outputPath, true);
                }*/
                FileOutputFormat.setOutputPath(job, outputPath);
            } catch (Exception e) {
        
                e.printStackTrace();
            }
        }
    
    }

SecondarySortWritable.java

package MR;
    
    import org.apache.hadoop.io.WritableComparable;
    
    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    import java.util.Objects;
    
    /**
     *
     */
    public class SecondarySortWritable implements WritableComparable<SecondarySortWritable> {
        
        private int first;
        private int second;
    
        public void write(DataOutput out) throws IOException {
        
            out.writeInt(this.first);
            out.writeInt(this.second);
        }
    
        public void readFields(DataInput in) throws IOException {
        
            this.first = in.readInt();
            this.second = in.readInt();
        }
    
        int getFirst() {
        
            return first;
        }
    
        void setFirst(int first) {
        
            this.first = first;
        }
    
        public int getSecond() {
        
            return second;
        }
    
        void setSecond(int second) {
        
            this.second = second;
        }
    
    
        public int compareTo(SecondarySortWritable o) {
        
            return this.first - o.first;
            //return o.second - this.second; //降序
        }
    
        @Override
        public boolean equals(Object o) {
        
            if (this == o) return true;
            if (o == null || getClass() != o.getClass()) return false;
            SecondarySortWritable that = (SecondarySortWritable) o;
            return first == that.first &&
                    second == that.second;
        }
    
        @Override
        public int hashCode() {
        
    
            return Objects.hash(first, second);
        }
    
        @Override
        public String toString() {
        
            return "first=" + first +
                    ", second=" + second;
        }
    }