/*
 * Decompiled with CFR 0.152.
 */
package org.apache.pinot.plugin.ingestion.batch.hadoop;

import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.Serializable;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.pinot.common.segment.generation.SegmentGenerationUtils;
import org.apache.pinot.common.utils.TarCompressionUtils;
import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationJobUtils;
import org.apache.pinot.plugin.ingestion.batch.hadoop.HadoopSegmentCreationMapper;
import org.apache.pinot.shaded.com.google.common.base.Preconditions;
import org.apache.pinot.spi.env.PinotConfiguration;
import org.apache.pinot.spi.filesystem.PinotFS;
import org.apache.pinot.spi.filesystem.PinotFSFactory;
import org.apache.pinot.spi.ingestion.batch.runner.IngestionJobRunner;
import org.apache.pinot.spi.ingestion.batch.spec.PinotClusterSpec;
import org.apache.pinot.spi.ingestion.batch.spec.PinotFSSpec;
import org.apache.pinot.spi.ingestion.batch.spec.SegmentGenerationJobSpec;
import org.apache.pinot.spi.plugin.PluginManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.Yaml;

public class HadoopSegmentGenerationJobRunner
extends Configured
implements IngestionJobRunner,
Serializable {
    private static final Logger LOGGER = LoggerFactory.getLogger(HadoopSegmentGenerationJobRunner.class);
    public static final String SEGMENT_GENERATION_JOB_SPEC = "segmentGenerationJobSpec";
    private static final String DEPS_JAR_DIR_FIELD = "dependencyJarDir";
    private static final String STAGING_DIR_FIELD = "stagingDir";
    private static final String SEGMENT_TAR_SUBDIR_NAME = "segmentTar";
    private static final String DEPS_JAR_SUBDIR_NAME = "dependencyJars";
    private SegmentGenerationJobSpec _spec;

    public HadoopSegmentGenerationJobRunner() {
        this.setConf(new Configuration());
        this.getConf().set("mapreduce.job.user.classpath.first", "true");
    }

    public HadoopSegmentGenerationJobRunner(SegmentGenerationJobSpec spec) {
        this();
        this.init(spec);
    }

    public void init(SegmentGenerationJobSpec spec) {
        PinotClusterSpec pinotClusterSpec;
        this._spec = spec;
        if (this._spec.getInputDirURI() == null) {
            throw new RuntimeException("Missing property 'inputDirURI' in 'jobSpec' file");
        }
        if (this._spec.getOutputDirURI() == null) {
            throw new RuntimeException("Missing property 'outputDirURI' in 'jobSpec' file");
        }
        if (this._spec.getRecordReaderSpec() == null) {
            throw new RuntimeException("Missing property 'recordReaderSpec' in 'jobSpec' file");
        }
        if (this._spec.getTableSpec() == null) {
            throw new RuntimeException("Missing property 'tableSpec' in 'jobSpec' file");
        }
        if (this._spec.getTableSpec().getTableName() == null) {
            throw new RuntimeException("Missing property 'tableName' in 'tableSpec'");
        }
        if (this._spec.getTableSpec().getSchemaURI() == null) {
            if (this._spec.getPinotClusterSpecs() == null || this._spec.getPinotClusterSpecs().length == 0) {
                throw new RuntimeException("Missing property 'schemaURI' in 'tableSpec'");
            }
            pinotClusterSpec = this._spec.getPinotClusterSpecs()[0];
            String schemaURI = SegmentGenerationUtils.generateSchemaURI((String)pinotClusterSpec.getControllerURI(), (String)this._spec.getTableSpec().getTableName());
            this._spec.getTableSpec().setSchemaURI(schemaURI);
        }
        if (this._spec.getTableSpec().getTableConfigURI() == null) {
            if (this._spec.getPinotClusterSpecs() == null || this._spec.getPinotClusterSpecs().length == 0) {
                throw new RuntimeException("Missing property 'tableConfigURI' in 'tableSpec'");
            }
            pinotClusterSpec = this._spec.getPinotClusterSpecs()[0];
            String tableConfigURI = SegmentGenerationUtils.generateTableConfigURI((String)pinotClusterSpec.getControllerURI(), (String)this._spec.getTableSpec().getTableName());
            this._spec.getTableSpec().setTableConfigURI(tableConfigURI);
        }
        if (this._spec.getExecutionFrameworkSpec().getExtraConfigs() == null) {
            this._spec.getExecutionFrameworkSpec().setExtraConfigs(new HashMap());
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void run() throws Exception {
        List pinotFSSpecs = this._spec.getPinotFSSpecs();
        for (PinotFSSpec pinotFSSpec : pinotFSSpecs) {
            PinotFSFactory.register((String)pinotFSSpec.getScheme(), (String)pinotFSSpec.getClassName(), (PinotConfiguration)new PinotConfiguration(pinotFSSpec));
        }
        URI inputDirURI = new URI(this._spec.getInputDirURI());
        if (inputDirURI.getScheme() == null) {
            inputDirURI = new File(this._spec.getInputDirURI()).toURI();
        }
        PinotFS inputDirFS = PinotFSFactory.create((String)inputDirURI.getScheme());
        List filteredFiles = SegmentGenerationUtils.listMatchedFilesWithRecursiveOption((PinotFS)inputDirFS, (URI)inputDirURI, (String)this._spec.getIncludeFileNamePattern(), (String)this._spec.getExcludeFileNamePattern(), (boolean)this._spec.isSearchRecursively());
        URI outputDirURI = new URI(this._spec.getOutputDirURI());
        if (outputDirURI.getScheme() == null) {
            outputDirURI = new File(this._spec.getOutputDirURI()).toURI();
        }
        PinotFS outputDirFS = PinotFSFactory.create((String)outputDirURI.getScheme());
        outputDirFS.mkdir(outputDirURI);
        String stagingDir = (String)this._spec.getExecutionFrameworkSpec().getExtraConfigs().get(STAGING_DIR_FIELD);
        Preconditions.checkNotNull((Object)stagingDir, (Object)"Please set config: stagingDir under 'executionFrameworkSpec.extraConfigs'");
        URI stagingDirURI = URI.create(stagingDir);
        if (stagingDirURI.getScheme() == null) {
            stagingDirURI = new File(stagingDir).toURI();
        }
        if (!outputDirURI.getScheme().equals(stagingDirURI.getScheme())) {
            throw new RuntimeException(String.format("The scheme of staging directory URI [%s] and output directory URI [%s] has to be same.", stagingDirURI, outputDirURI));
        }
        if (outputDirFS.exists(stagingDirURI)) {
            LOGGER.info("Clearing out existing staging directory: [{}]", (Object)stagingDirURI);
            outputDirFS.delete(stagingDirURI, true);
        }
        outputDirFS.mkdir(stagingDirURI);
        Path stagingInputDir = new Path(stagingDirURI.toString(), "input");
        outputDirFS.mkdir(stagingInputDir.toUri());
        Path stagingSegmentTarUri = new Path(stagingDirURI.toString(), SEGMENT_TAR_SUBDIR_NAME);
        outputDirFS.mkdir(stagingSegmentTarUri.toUri());
        int numDataFiles = filteredFiles.size();
        LOGGER.info("Creating segments with data files: {}", (Object)filteredFiles);
        if (!SegmentGenerationJobUtils.useGlobalDirectorySequenceId(this._spec.getSegmentNameGeneratorSpec())) {
            HashMap localDirIndex = new HashMap();
            for (String filteredFile : filteredFiles) {
                java.nio.file.Path filteredParentPath = Paths.get(filteredFile, new String[0]).getParent();
                if (!localDirIndex.containsKey(filteredParentPath.toString())) {
                    localDirIndex.put(filteredParentPath.toString(), new ArrayList());
                }
                ((List)localDirIndex.get(filteredParentPath.toString())).add(filteredFile);
            }
            for (String parentPath : localDirIndex.keySet()) {
                List siblingFiles = (List)localDirIndex.get(parentPath);
                Collections.sort(siblingFiles);
                for (int i = 0; i < siblingFiles.size(); ++i) {
                    URI inputFileURI = SegmentGenerationUtils.getFileURI((String)((String)siblingFiles.get(i)), (URI)SegmentGenerationUtils.getDirectoryURI((String)parentPath));
                    this.createInputFileUriAndSeqIdFile(inputFileURI, outputDirFS, stagingInputDir, i);
                }
            }
        } else {
            for (int i = 0; i < numDataFiles; ++i) {
                URI inputFileURI = SegmentGenerationUtils.getFileURI((String)((String)filteredFiles.get(i)), (URI)inputDirURI);
                this.createInputFileUriAndSeqIdFile(inputFileURI, outputDirFS, stagingInputDir, i);
            }
        }
        try {
            int jobParallelism;
            Job job = Job.getInstance(this.getConf());
            job.setJobName(this.getClass().getSimpleName());
            job.setJarByClass(SegmentGenerationJobSpec.class);
            job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
            this.addMapperJarToDistributedCache(job, outputDirFS, stagingDirURI);
            Configuration jobConf = job.getConfiguration();
            String hadoopTokenFileLocation = System.getenv("HADOOP_TOKEN_FILE_LOCATION");
            if (hadoopTokenFileLocation != null) {
                jobConf.set("mapreduce.job.credentials.binary", hadoopTokenFileLocation);
            }
            if ((jobParallelism = this._spec.getSegmentCreationJobParallelism()) <= 0 || jobParallelism > numDataFiles) {
                jobParallelism = numDataFiles;
            }
            jobConf.setInt("mapreduce.job.maps", jobParallelism);
            this.packPluginsToDistributedCache(job, outputDirFS, stagingDirURI);
            String dependencyJarsSrcDir = (String)this._spec.getExecutionFrameworkSpec().getExtraConfigs().get(DEPS_JAR_DIR_FIELD);
            if (dependencyJarsSrcDir != null) {
                Path dependencyJarsDestPath = new Path(stagingDirURI.toString(), DEPS_JAR_SUBDIR_NAME);
                this.addJarsToDistributedCache(job, new File(dependencyJarsSrcDir), outputDirFS, dependencyJarsDestPath.toUri(), false);
            }
            this._spec.setOutputDirURI(stagingSegmentTarUri.toUri().toString());
            jobConf.set(SEGMENT_GENERATION_JOB_SPEC, new Yaml().dumpAsMap((Object)this._spec));
            this._spec.setOutputDirURI(outputDirURI.toString());
            job.setMapperClass(this.getMapperClass());
            job.setNumReduceTasks(0);
            job.setInputFormatClass(TextInputFormat.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(Text.class);
            FileInputFormat.addInputPath(job, stagingInputDir);
            FileOutputFormat.setOutputPath(job, new Path(stagingDir, "output"));
            job.waitForCompletion(true);
            if (!job.isSuccessful()) {
                throw new RuntimeException("Job failed: " + String.valueOf(job));
            }
            LOGGER.info("Moving segment tars from staging directory [{}] to output directory [{}]", (Object)stagingDirURI, (Object)outputDirURI);
            SegmentGenerationJobUtils.moveFiles(outputDirFS, new Path(stagingDir, SEGMENT_TAR_SUBDIR_NAME).toUri(), outputDirURI, this._spec.isOverwriteOutput());
        }
        finally {
            LOGGER.info("Trying to clean up staging directory: [{}]", (Object)stagingDirURI);
            outputDirFS.delete(stagingDirURI, true);
        }
    }

    private void createInputFileUriAndSeqIdFile(URI inputFileURI, PinotFS outputDirFS, Path stagingInputDir, int seqId) throws Exception {
        File localFile = File.createTempFile("pinot-filepath-", ".txt");
        try (DataOutputStream dataOutputStream = new DataOutputStream(new FileOutputStream(localFile));){
            dataOutputStream.write((String.valueOf(inputFileURI) + " " + seqId).getBytes(StandardCharsets.UTF_8));
            dataOutputStream.flush();
            outputDirFS.copyFromLocalFile(localFile, new Path(stagingInputDir, Integer.toString(seqId)).toUri());
        }
    }

    protected Class<? extends Mapper<LongWritable, Text, LongWritable, Text>> getMapperClass() {
        return HadoopSegmentCreationMapper.class;
    }

    protected void addMapperJarToDistributedCache(Job job, PinotFS outputDirFS, URI stagingDirURI) throws Exception {
        File ourJar = new File(this.getClass().getProtectionDomain().getCodeSource().getLocation().toURI());
        Path distributedCacheJar = new Path(stagingDirURI.toString(), ourJar.getName());
        if (ourJar.isDirectory()) {
            outputDirFS.copyFromLocalDir(ourJar, distributedCacheJar.toUri());
        } else {
            outputDirFS.copyFromLocalFile(ourJar, distributedCacheJar.toUri());
        }
        job.addFileToClassPath(distributedCacheJar);
    }

    protected void packPluginsToDistributedCache(Job job, PinotFS outputDirFS, URI stagingDirURI) {
        String[] pluginDirectories = PluginManager.get().getPluginsDirectories();
        if (pluginDirectories == null) {
            LOGGER.warn("Plugin directories is null, nothing to pack to distributed cache");
            return;
        }
        ArrayList<File> validPluginDirectories = new ArrayList<File>();
        for (String pluginsDirPath : pluginDirectories) {
            File pluginsDir = new File(pluginsDirPath);
            if (!pluginsDir.exists()) {
                LOGGER.warn("Cannot find Pinot plugins directory at [{}]", (Object)pluginsDirPath);
                return;
            }
            validPluginDirectories.add(pluginsDir);
        }
        File pluginsTarGzFile = new File("pinot-plugins.tar.gz");
        try {
            File[] files = validPluginDirectories.toArray(new File[0]);
            TarCompressionUtils.createCompressedTarFile((File[])files, (File)pluginsTarGzFile);
            Path cachedPluginsTarball = new Path(stagingDirURI.toString(), "pinot-plugins.tar.gz");
            outputDirFS.copyFromLocalFile(pluginsTarGzFile, cachedPluginsTarball.toUri());
            job.addCacheFile(cachedPluginsTarball.toUri());
        }
        catch (Exception e) {
            LOGGER.error("Failed to tar plugins directories and upload to staging dir", e);
            throw new RuntimeException(e);
        }
        String pluginsIncludes = System.getProperty("plugins.include");
        if (pluginsIncludes != null) {
            job.getConfiguration().set("plugins.include", pluginsIncludes);
        }
    }

    protected void addJarsToDistributedCache(Job job, File srcDir, PinotFS dstFS, URI dstDirUri, boolean recursive) throws Exception {
        if (!srcDir.exists()) {
            LOGGER.warn("No jars directory at [{}]", (Object)srcDir);
            return;
        }
        Path dstDirPath = new Path(dstDirUri);
        for (File jarFile : FileUtils.listFiles(srcDir, new String[]{"jar"}, recursive)) {
            LOGGER.info("Adding jar {} to distributed cache", (Object)jarFile);
            String jarName = jarFile.getName();
            Path dstFilePath = new Path(dstDirPath, jarName);
            URI dstFileUri = dstFilePath.toUri();
            dstFS.copyFromLocalFile(jarFile, dstFileUri);
            job.addFileToClassPath(dstFilePath);
        }
    }
}

