#!/bin/bash

if [ "${DIB_DEBUG_TRACE:-0}" -gt 0 ]; then
    set -x
fi
set -eu
set -o pipefail

tmp_dir=$TARGET_ROOT/tmp/spark
mkdir -p $tmp_dir

# The user is not providing his own Spark distribution package
if [ -z "${SPARK_DOWNLOAD_URL:-}" ]; then
    # Check hadoop version
    # INFO on hadoop versions: http://spark.apache.org/docs/latest/hadoop-third-party-distributions.html
    case "$DIB_CDH_VERSION" in
        5.0)
            SPARK_HADOOP_DL=hadoop2.3
        ;;
        5.3)
            SPARK_HADOOP_DL=hadoop2.4
        ;;
        5.4)
            SPARK_HADOOP_DL=hadoop2.6
        ;;
        CDH4)
            SPARK_HADOOP_DL=cdh4
        ;;
        *)
            echo "WARNING: Cloudera CDH version $DIB_CDH_VERSION not supported."
            echo "WARNING: use the SPARK_DOWNLOAD_URL variable to install a custom Spark version."
            exit 1
        ;;
    esac

    SPARK_DOWNLOAD_URL="http://archive.apache.org/dist/spark/spark-$DIB_SPARK_VERSION/spark-$DIB_SPARK_VERSION-bin-$SPARK_HADOOP_DL.tgz"
fi

echo "Downloading SPARK"
spark_file=$(basename "$SPARK_DOWNLOAD_URL")
cached_tar="$DIB_IMAGE_CACHE/$spark_file"
$TMP_HOOKS_PATH/bin/cache-url $SPARK_DOWNLOAD_URL $cached_tar
sudo install -D -g root -o root -m 0755 $cached_tar $tmp_dir
echo "$SPARK_DOWNLOAD_URL" > $tmp_dir/spark_url.txt
