#!/usr/bin/env bash
#/ Usage: ghe-backup [-hv] [--version]
#/
#/ Take snapshots of all GitHub Enterprise data, including Git repository data,
#/ the MySQL database, instance settings, GitHub Pages data, etc.
#/
#/ OPTIONS:
#/   -v | --verbose       Enable verbose output.
#/   -h | --help          Show this message.
#/        --version       Display version information.
#/   -i | --incremental   Incremental backup
#/        --skip-checks   Skip storage/sw version checks
#/

set -e

# Parse arguments
while true; do
  case "$1" in
    -h|--help)
      export GHE_SHOW_HELP=true
      shift
      ;;
    --version)
      export GHE_SHOW_VERSION=true
      shift
      ;;
    -v|--verbose)
      export GHE_VERBOSE=true
      shift
      ;;
    -i|--incremental)
      export GHE_INCREMENTAL=true
      shift
      ;;
    --skip-checks)
      export GHE_SKIP_CHECKS=true
      shift
      ;;
    -*)
      echo "Error: invalid argument: '$1'" 1>&2
      exit 1
      ;;
    *)
      break
      ;;
  esac
done


export CALLING_SCRIPT="ghe-backup"

# Bring in the backup configuration
# shellcheck source=share/github-backup-utils/ghe-backup-config
. "$( dirname "${BASH_SOURCE[0]}" )/../share/github-backup-utils/ghe-backup-config"


# Check to make sure moreutils parallel is installed and working properly
ghe_parallel_check


# Used to record failed backup steps
failures=
failures_file="$(mktemp -t backup-utils-backup-failures-XXXXXX)"

# CPU and IO throttling to keep backups from thrashing around.
export GHE_NICE=${GHE_NICE:-"nice -n 19"}
export GHE_IONICE=${GHE_IONICE:-"ionice -c 3"}

# Create the timestamped snapshot directory where files for this run will live,
# change into it, and mark the snapshot as incomplete by touching the
# 'incomplete' file. If the backup succeeds, this file will be removed
# signifying that the snapshot is complete.
mkdir -p "$GHE_SNAPSHOT_DIR"
cd "$GHE_SNAPSHOT_DIR"
touch "incomplete"

# Exit early if the snapshot filesystem doesn't support hard links, symlinks and
# if rsync doesn't support hardlinking of dangling symlinks
trap 'rm -rf src dest1 dest2' EXIT
mkdir -p src
touch src/testfile
if ! ln -s /data/does/not/exist/hooks/ src/ >/dev/null 2>&1; then
  log_error "Error: the filesystem containing $GHE_DATA_DIR does not support symbolic links. \nGit repositories contain symbolic links that need to be preserved during a backup." 1>&2
  exit 1
fi

if ! output=$(rsync -a src/ dest1 2>&1 && rsync -av src/ --link-dest=../dest1 dest2 2>&1); then
  log_error "Error: rsync encountered an error that could indicate a problem with permissions,\n hard links, symbolic links, or another issue that may affect backups." 1>&2
  echo "$output"
  exit 1
fi

if [ "$(stat -c %i dest1/testfile)" != "$(stat -c %i dest2/testfile)" ]; then
  log_error "Error: the filesystem containing $GHE_DATA_DIR does not support hard links.\n Backup Utilities use hard links to store backup data efficiently." 1>&2
  exit 1
fi
rm -rf src dest1 dest2

# To prevent multiple backup runs happening at the same time, we create a
# in-progress file with the timestamp and pid of the backup process,
# giving us a form of locking.
#
# Set up a trap to remove the in-progress file if we exit for any reason but
# verify that we are the same process before doing so.
#
# The cleanup trap also handles disabling maintenance mode on the appliance if
# it was automatically enabled.
cleanup () {
  if [ -f ../in-progress ]; then
    progress=$(cat ../in-progress)
    snapshot=$(echo "$progress" | cut -d ' ' -f 1)
    pid=$(echo "$progress" | cut -d ' ' -f 2)
    if [ "$snapshot" = "$GHE_SNAPSHOT_TIMESTAMP" ] && [ "$$" = "$pid" ]; then
      unlink ../in-progress
    fi
  fi

  rm -rf "$failures_file"
  rm -f "${GHE_DATA_DIR}/in-progress-backup"
  if [ ! -z $PROGRESS_DIR ] && [ -d $PROGRESS_DIR ]; then
    rm -rf "${PROGRESS_DIR:?}"
  fi

  # Cleanup SSH multiplexing
  ghe-ssh --clean

  bm_end "$(basename $0)"
}

# Setup exit traps
trap 'cleanup' EXIT
trap 'exit $?' INT # ^C always terminate


# Check to see if there is a running restore
ghe_restore_check

# Check to see if there is a running backup
if [ -h ../in-progress ]; then

  log_error "Detected a backup already in progress from a previous version of ghe-backup. \nIf there is no backup in progress anymore, please remove \nthe $GHE_DATA_DIR/in-progress file." >&2
  exit 1
fi

if [ -f ../in-progress ]; then
  progress=$(cat ../in-progress)
  snapshot=$(echo "$progress" | cut -d ' ' -f 1)
  pid=$(echo "$progress" | cut -d ' ' -f 2)
  if ! ps -p "$pid" >/dev/null 2>&1; then
    # We can safely remove in-progress, ghe-prune-snapshots
    # will clean up the failed backup.
    unlink ../in-progress
  else
    log_error "Error: A backup of $GHE_HOSTNAME may still be running on PID $pid. \nIf PID $pid is not a process related to the backup utilities, please remove \nthe $GHE_DATA_DIR/in-progress file and try again." 1>&2
    exit 1
  fi
fi

# Perform a host connection check and establish the remote appliance version.
# The version is available in the GHE_REMOTE_VERSION variable and also written
# to a version file in the snapshot directory itself.
# ghe_remote_version_required should be run before any other instances of ghe-ssh
# to ensure that there are no problems with host key verification.
ghe_remote_version_required
echo "$GHE_REMOTE_VERSION" > version

# Setup progress tracking
init-progress
export PROGRESS_TOTAL=14 # Minimum number of steps in backup is 14
echo "$PROGRESS_TOTAL" > $PROGRESS_DIR/total
export PROGRESS_TYPE="Backup"
echo "$PROGRESS_TYPE" > $PROGRESS_DIR/type
export PROGRESS=0 # Used to track progress of backup
echo "$PROGRESS" > $PROGRESS_DIR/progress

OPTIONAL_STEPS=0
# Backup actions+mssql
if ghe-ssh "$GHE_HOSTNAME" -- 'ghe-config --true app.actions.enabled'; then
  OPTIONAL_STEPS=$((OPTIONAL_STEPS + 2))
fi

# Backup fsck
if [ "$GHE_BACKUP_FSCK" = "yes" ]; then
  OPTIONAL_STEPS=$((OPTIONAL_STEPS + 1))
fi

# Backup minio
if ghe-ssh "$GHE_HOSTNAME" -- 'ghe-config --true app.minio.enabled'; then
  OPTIONAL_STEPS=$((OPTIONAL_STEPS + 1))
fi

# Backup pages
if [ "$GHE_BACKUP_PAGES" != "no" ]; then
  OPTIONAL_STEPS=$((OPTIONAL_STEPS + 1))
fi

PROGRESS_TOTAL=$((OPTIONAL_STEPS + PROGRESS_TOTAL)) # Minimum number of steps in backup is 14
echo "$PROGRESS_TOTAL" > $PROGRESS_DIR/total

# check that incremental settings are valid if set
is_inc=$(is_incremental_backup_feature_on)

if [ "$is_inc" = true ]; then
if [ "$GHE_VERSION_MAJOR" -lt 3 ]; then
  log_error "Can only perform incremental backups on enterprise version 3.10 or higher"
  exit 1
fi
if [ "$GHE_VERSION_MINOR" -lt 10 ]; then
  log_error "Can only perform incremental backups on enterprise version 3.10 or higher"
  exit 1
fi

  incremental_backup_check
  # If everything is ok, check if we have hit GHE_MAX_INCREMENTAL_BACKUPS, performing pruning actions if necessary
  check_for_incremental_max_backups
  # initialize incremental backup if it hasn't been done yet
  incremental_backup_init
fi

echo "$GHE_SNAPSHOT_TIMESTAMP $$" > ../in-progress
echo "$GHE_SNAPSHOT_TIMESTAMP $$" > "${GHE_DATA_DIR}/in-progress-backup"

bm_start "$(basename $0)"
START_TIME=$(date +%s)
log_info "Starting backup of $GHE_HOSTNAME with backup-utils v$BACKUP_UTILS_VERSION in snapshot $GHE_SNAPSHOT_TIMESTAMP"



if [ -n "$GHE_ALLOW_REPLICA_BACKUP" ]; then
  echo "Warning: backing up a high availability replica may result in inconsistent or unreliable backups."
fi

# Output system information of the backup host

# If /etc/os-release exists, use it to get the OS version
if [ -f /etc/os-release ]; then
  OS_NAME=$(grep '^NAME' /etc/os-release | cut -d'"' -f2)
  VERSION_ID=$(grep '^VERSION_ID' /etc/os-release | cut -d'"' -f2)
  echo "Running on: $OS_NAME $VERSION_ID"
else
  echo "Running on: Unknown OS"
fi

# If nproc command exists, use it to get the number of CPUs
if command -v nproc >/dev/null 2>&1; then
  echo "CPUs: $(nproc)"
else
  echo "CPUs: Unknown"
fi

# If the free command exists, use it to get the memory details
if command -v free >/dev/null 2>&1; then
  echo "Memory $(free -m  | grep '^Mem:' | awk '{print "total/used/free+share/buff/cache: " $2 "/" $3 "/" $4 "+" $5 "/" $6 "/" $7}')"
else
  echo "Memory: Unknown"
fi


# Log backup start message in /var/log/syslog on remote instance
ghe_remote_logger "Starting backup from $(hostname) with backup-utils v$BACKUP_UTILS_VERSION in snapshot $GHE_SNAPSHOT_TIMESTAMP ..."

export GHE_BACKUP_STRATEGY=${GHE_BACKUP_STRATEGY:-$(ghe-backup-strategy)}

# Record the strategy with the snapshot so we will know how to restore.
echo "$GHE_BACKUP_STRATEGY" > strategy

# Create benchmark file
bm_init > /dev/null

ghe-backup-store-version  ||
log_warn "Warning: storing backup-utils version remotely failed."

log_info "Backing up GitHub secrets ..."
ghe-backup-secrets || failures="$failures secrets"

log_info "Backing up GitHub settings ..."
ghe-backup-settings || failures="$failures settings"

log_info "Backing up SSH authorized keys ..."
bm_start "ghe-export-authorized-keys"
ghe-ssh "$GHE_HOSTNAME" -- 'ghe-export-authorized-keys' > authorized-keys.json ||
failures="$failures authorized-keys"
bm_end "ghe-export-authorized-keys"

log_info "Backing up SSH host keys ..."
bm_start "ghe-export-ssh-host-keys"
ghe-ssh "$GHE_HOSTNAME" -- 'ghe-export-ssh-host-keys' > ssh-host-keys.tar ||
failures="$failures ssh-host-keys"
bm_end "ghe-export-ssh-host-keys"

ghe-backup-mysql || failures="$failures mysql"

if ghe-ssh "$GHE_HOSTNAME" -- 'ghe-config --true app.actions.enabled'; then
  log_info "Backing up MSSQL databases ..."
  ghe-backup-mssql 1>&3 || failures="$failures mssql"

  log_info "Backing up Actions data ..."
  ghe-backup-actions 1>&3 || failures="$failures actions"
fi

if ghe-ssh "$GHE_HOSTNAME" -- 'ghe-config --true app.minio.enabled'; then
  log_info "Backing up Minio data ..."
  ghe-backup-minio 1>&3 || failures="$failures minio"
fi

cmd_title=$(log_info "Backing up Redis database ...")
commands=("
echo \"$cmd_title\"
ghe-backup-redis > redis.rdb || printf %s \"redis \" >> \"$failures_file\"")

cmd_title=$(log_info "Backing up audit log ...")
commands+=("
echo \"$cmd_title\"
ghe-backup-es-audit-log || printf %s \"audit-log \" >> \"$failures_file\"")

cmd_title=$(log_info "Backing up Git repositories ...")
commands+=("
echo \"$cmd_title\"
ghe-backup-repositories || printf %s \"repositories \" >> \"$failures_file\"")

# Pages backups are skipped only if GHE_BACKUP_PAGES is explicitly set to 'no' to guarantee backward compatibility.
# If a customer upgrades backup-utils but keeps the config file from a previous version, Pages backups still work as expected.

if [ "$GHE_BACKUP_PAGES" != "no" ]; then
  cmd_title=$(log_info "Backing up GitHub Pages artifacts ...")
  commands+=("
  echo \"$cmd_title\"
  ghe-backup-pages || printf %s \"pages \" >> \"$failures_file\"")
fi

cmd_title=$(log_info "Backing up storage data ...")
commands+=("
echo \"$cmd_title\"
ghe-backup-storage || printf %s \"storage \" >> \"$failures_file\"")

cmd_title=$(log_info "Backing up custom Git hooks ...")
commands+=("
echo \"$cmd_title\"
ghe-backup-git-hooks || printf %s \"git-hooks \" >> \"$failures_file\"")

if [ "$GHE_BACKUP_STRATEGY" = "rsync" ]; then
  increment-progress-total-count 1
  cmd_title=$(log_info "Backing up Elasticsearch indices ...")
  commands+=("
  echo \"$cmd_title\"
  ghe-backup-es-rsync || printf %s \"elasticsearch \" >> \"$failures_file\"")
fi

if [ "$GHE_PARALLEL_ENABLED" = "yes" ]; then
  "$GHE_PARALLEL_COMMAND" "${GHE_PARALLEL_COMMAND_OPTIONS[@]}" -- "${commands[@]}"
else
  for c in "${commands[@]}"; do
    eval "$c"
  done
fi

if [ -s "$failures_file" ]; then
  failures="$failures $(cat "$failures_file")"
fi

# git fsck repositories after the backup
if [ "$GHE_BACKUP_FSCK" = "yes" ]; then
  log_info "Running git fsck on repositories ..."
  ghe-backup-fsck "$GHE_SNAPSHOT_DIR" || failures="$failures fsck"
fi

# If everything was successful, mark the snapshot as complete, update the
# current symlink to point to the new snapshot and prune expired and failed
# snapshots.
if [ -z "$failures" ]; then
  rm "incomplete"

  rm -f "../current"
  ln -s "$GHE_SNAPSHOT_TIMESTAMP" "../current"

  if [[ $GHE_PRUNING_SCHEDULED != "yes" ]]; then
    ghe-prune-snapshots
  else
    log_info "Expired and incomplete snapshots to be pruned separately"
  fi
else
  log_info "Skipping pruning snapshots, since some backups failed..."
fi

END_TIME=$(date +%s)
log_info "Runtime: $((END_TIME - START_TIME)) seconds"
log_info "Completed backup of $GHE_HOSTNAME in snapshot $GHE_SNAPSHOT_TIMESTAMP at $(date +"%H:%M:%S")"

# Exit non-zero and list the steps that failed.
if [ -z "$failures" ]; then
  ghe_remote_logger "Completed backup from $(hostname) / snapshot $GHE_SNAPSHOT_TIMESTAMP successfully."
else
  steps="${failures// /, }"
  ghe_remote_logger "Completed backup from $(hostname) / snapshot $GHE_SNAPSHOT_TIMESTAMP with failures: ${steps}."
  log_error "Error: Snapshot incomplete. Some steps failed: ${steps}. "
  ghe_backup_finished
  exit 1
fi

# Detect if the created backup contains any leaked ssh keys
log_info "Checking for leaked ssh keys ..."
ghe-detect-leaked-ssh-keys -s "$GHE_SNAPSHOT_DIR" || true

log_info "Backup of $GHE_HOSTNAME finished."

# Remove in-progress file
ghe_backup_finished
