/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.indexer;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.indexer.IndexWriters;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CleaningJob
implements Tool {
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private Configuration conf;

    public Configuration getConf() {
        return this.conf;
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
    }

    public void delete(String crawldb, boolean noCommit) throws IOException, InterruptedException, ClassNotFoundException {
        StopWatch stopWatch = new StopWatch();
        stopWatch.start();
        LOG.info("CleaningJob: starting");
        Job job = Job.getInstance((Configuration)this.getConf(), (String)("Nutch CleaningJob: " + crawldb));
        Configuration conf = job.getConfiguration();
        FileInputFormat.addInputPath((Job)job, (Path)new Path(crawldb, "current"));
        conf.setBoolean("noCommit", noCommit);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(NullOutputFormat.class);
        job.setMapOutputKeyClass(ByteWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setMapperClass(DBFilter.class);
        job.setReducerClass(DeleterReducer.class);
        job.setJarByClass(CleaningJob.class);
        conf.setBoolean("indexer.delete", true);
        try {
            boolean success = job.waitForCompletion(true);
            if (!success) {
                String message = NutchJob.getJobFailureLogMessage("CleaningJob", job);
                LOG.error(message);
                throw new RuntimeException(message);
            }
        }
        catch (ClassNotFoundException | InterruptedException e) {
            LOG.error(StringUtils.stringifyException((Throwable)e));
            throw e;
        }
        stopWatch.stop();
        LOG.info("CleaningJob: finished, elapsed: {} ms", (Object)stopWatch.getTime(TimeUnit.MILLISECONDS));
    }

    public int run(String[] args) throws IOException {
        if (args.length < 1) {
            String usage = "Usage: CleaningJob <crawldb> [-noCommit]";
            LOG.error("Missing crawldb.\n{}", (Object)usage);
            return 1;
        }
        boolean noCommit = false;
        if (args.length == 2 && args[1].equals("-noCommit")) {
            noCommit = true;
        }
        try {
            this.delete(args[0], noCommit);
        }
        catch (Exception e) {
            LOG.error("CleaningJob:", (Throwable)e);
            return -1;
        }
        return 0;
    }

    public static void main(String[] args) throws Exception {
        int result = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new CleaningJob(), (String[])args);
        System.exit(result);
    }

    public static class DeleterReducer
    extends Reducer<ByteWritable, Text, Text, ByteWritable> {
        private static final int NUM_MAX_DELETE_REQUEST = 1000;
        private int numDeletes = 0;
        private int totalDeleted = 0;
        private boolean noCommit = false;
        IndexWriters writers = null;

        public void setup(Reducer.Context context) {
            Configuration conf = context.getConfiguration();
            this.writers = IndexWriters.get(conf);
            try {
                this.writers.open(conf, "Deletion");
            }
            catch (IOException e) {
                throw new RuntimeException(e);
            }
            this.noCommit = conf.getBoolean("noCommit", false);
        }

        public void cleanup(Reducer.Context context) throws IOException {
            if (this.totalDeleted > 0 && !this.noCommit) {
                this.writers.commit();
            }
            this.writers.close();
            LOG.info("CleaningJob: deleted a total of {} documents", (Object)this.totalDeleted);
        }

        public void reduce(ByteWritable key, Iterable<Text> values, Reducer.Context context) throws IOException {
            for (Text document : values) {
                this.writers.delete(document.toString());
                ++this.totalDeleted;
                context.getCounter("CleaningJobStatus", "Deleted documents").increment(1L);
            }
        }
    }

    public static class DBFilter
    extends Mapper<Text, CrawlDatum, ByteWritable, Text> {
        private ByteWritable OUT = new ByteWritable(3);

        public void map(Text key, CrawlDatum value, Mapper.Context context) throws IOException, InterruptedException {
            if (value.getStatus() == 3 || value.getStatus() == 7) {
                context.write((Object)this.OUT, (Object)key);
            }
        }
    }
}

