#!/usr/bin/env bash

# Update a BagIt archive, according to RFC 8493 (and beyond).
#
# Copyright (c) 2021-2026 by Reto Kromer <https://reto.ch/>
#
# This Bash script is released under a 3-Clause BSD License and is provided
# "as is" without warranty or support of any kind.


# initialise constants
VERSION='2025-12-04'
SCRIPT_NAME="$(basename "$0")"
CONFIG_FILE="${HOME}/.config/AVpres/Bash_AVpres/${SCRIPT_NAME}.txt"
RED='\033[1;31m'
BLUE='\033[1;34m'
NC='\033[0m'

# load configuration file if any and initialise default values
[[ -f "${CONFIG_FILE}" ]] && . "${CONFIG_FILE}"
md5="${md5:-$(which md5sum)}"
sha1="${sha1:-$(which sha1sum)}"
sha256="${sha256:-$(which sha256sum)}"
sha512="${sha512:-$(which sha512sum)}"
crc32="${crc32:-$(which crc32)}"
xxh32="${xxh32:-$(which xxhsum) -H32}"
xxh64="${xxh64:-$(which xxhsum) -H64}"
xxh128="${xxh128:-$(which xxhsum) -H128}"

# initialise another default value
declare -a algorithm_list=('sha512' 'md5' 'sha1' 'sha256')
algorithm_list+=('crc32' 'xxh32' 'xxh64' 'xxh128')

# initialise variables
unset algorithm
unset input_path
unset manifest

# enable extended pattern matching features
shopt -s extglob


# get date-and-time stamp
date_time() {
  TZ='UTC' date +'[%F %T %Z]'
}


# print an error message and exit with status 1
abort() {
  echo -e "${RED}${1:-An unknown error occurred.\a}${NC}"
  echo "$(date_time) ${1:-An unknown error occurred.}" >> "${LOG_FILE}"
  echo "$(date_time) END" >> "${LOG_FILE}"
  exit 1
}


# print a minimal help message and exit with status 0
print_prompt() {
  echo "$(date_time) print prompt" >> "${LOG_FILE}"
  cat << EOF
Help:
  ${SCRIPT_NAME} -h
EOF
  echo "$(date_time) END" >> "${LOG_FILE}"
  exit 1
}


# print the help message and exit with status 0
print_help() {
  echo "$(date_time) print help" >> "${LOG_FILE}"
  cat << EOF

Usage:
  ${SCRIPT_NAME} (-i|-b) <input_folder>
  ${SCRIPT_NAME} -h | -x
Options:
  -i  BagIt archive folder
  -b  is an alias of -i
  -h  this help
  -x  advanced options with their default arguments
EOF
  if [[ -f "${CONFIG_FILE}" ]]; then
    echo "      (local configuration file found and loaded)"
  else
    echo "      (no local configuration file found on this computer)"
  fi
  cat << EOF
Default dependency:
  one of sha512sum, md5sum, sha1sum, sha256sum, xxhsum or crc32
See also:
  man ${SCRIPT_NAME}
  https://avpres.net/Bash_AVpres/
About:
  Abstract: Update a BagIt archive, according to RFC 8493
  Version:  ${VERSION}

EOF
  echo "$(date_time) END" >> "${LOG_FILE}"
  exit 0
}


# print advanced options with their default arguments and exit with status 0
print_options() {
  echo "$(date_time) print options" >> "${LOG_FILE}"
  if [[ -f "${CONFIG_FILE}" ]]; then
    cat << EOF

Local configuration file
  '${CONFIG_FILE}'
found and loaded.
EOF
  else
    cat << EOF

No local configuration file for '${SCRIPT_NAME}' found on this computer.
EOF
  fi
  cat << EOF

Advanced options with their default arguments:
  --md5='${md5}'
  --sha1='${sha1}'
  --sha256='${sha256}'
  --sha512='${sha512}'
  --crc32='${crc32}'
  --xxh32='${xxh32}'
  --xxh64='${xxh64}'
  --xxh128='${xxh128}'

EOF
  echo "$(date_time) END" >> "${LOG_FILE}"
  exit 0
}


# check that provided input folder is valid and normalise path if needed
# check that 'bagit.txt' file is present
check_input() {
  if [[ ! "${input_path}" ]]; then
    abort "Error: No input folder provided via '-i', '-b', '--input' or '--bagit'."
  elif [[ ! -d "${input_path}" ]]; then
    abort "Error: '${input_path}' is not a directory."
  fi
  if [[ "${input_path%/*}" == "${input_path}" ]]; then
    input_path="${PWD}/${input_path}"
  fi
  if [[ ! -f "${input_path}/bagit.txt" ]]; then
    abort "Error: The 'bagit.txt' file is missing."
  fi
}


# find algorithm and check that command is running
check_algorithm() {
  for algo in "${algorithm_list[@]}"; do
    if [[ -f "${input_path}/manifest-${algo}.txt" ]]; then
      algorithm="${algo}"
      break
    fi
  done
  if [[ ! "${algorithm}" ]]; then
    abort "Error: No 'manifest-<algorithm>.txt' file found."
  else
    manifest="${input_path}/manifest-${algorithm}.txt"
  fi
  if [[ ! "${manifest}" ]]; then
    abort "Error: The checksum manifest file '$manifest-${algorithm}.txt' is missing."
  fi
  if ! command -v ${!algorithm} &> /dev/null; then
    abort "Error: '${algorithm}' is failing."
  fi
}


# update BagIt archive
update_bagit() {
  local list_1
  local list_2
  local number_1
  local number_2
  local line
  local number
  local size=0
  local tmp
  local tmp_name

  echo -e "${BLUE}Please wait while updating checksum manifest...${NC}"
  echo "$(date_time) updating checksum manifest" >> "${LOG_FILE}"

  # elements which are only in the folder
  list_1=$(comm -23 <(ls "${input_path}/data" | sed 's#^#data/#') <(awk '{print $2}' < "${manifest}"))
  if [[ -n "${list_1}" ]]; then
    number_1=$(echo "${list_1}" | wc -l | bc)
  else
    number_1=0
  fi

  # elements which are only in the manifest
  list_2=$(comm -13 <(ls "${input_path}/data" | sed 's#^#data/#') <(awk '{print $2}' < "${manifest}"))
  if [[ -n "${list_2}" ]]; then
    number_2=$(echo "${list_2}" | wc -l | bc)
  else
    number_2=0
  fi

  # update 'manifest-<algorithm>.txt' checksum manifest file
  if [[ -n "${list_1}" ]] && [[ -n "${list_2}" ]] && [[ "${list_1}" != "${list_1}" ]]; then
    abort "Error: Both folder and checksum manifest have been modified."
  elif (( "${number_1}" == "${number_2}" )); then
    echo -e "${BLUE}The number of files equals the number of checksums.${NC}"
    echo "The number of files equals the number of checksums." >> "${LOG_FILE}"
    echo "$(date_time) END" >> "${LOG_FILE}"
    exit 0
  elif (( "${number_1}" > "${number_2}" )); then

    # compute checksum and add it to manifest
    while IFS= read -r line; do
      ${!algorithm} "${input_path}/${line}" | sed "s#${input_path}/*##" >> "${manifest}"
    done < <(echo "${list_1}")

    echo -e "${BLUE}The missing checksums have been added to the manifest.${NC}"
    echo "The missing checksums have been added to the manifest." >> "${LOG_FILE}"
  elif (( "${number_1}" < "${number_2}" )); then

    # delete checksum from manifest
    while IFS= read -r line; do
      grep -v "${line}" "${manifest}" > tmp_file && mv tmp_file "${manifest}"
    done < <(echo "${list_2}")

    #[[ -f tmp_file ]] && rm tmp_file
    echo -e "${BLUE}The superfluous checksums have been deleted from the manifest.${NC}"
    echo "The superfluous checksums have been deleted from the manifest." >> "${LOG_FILE}"
  else
    abort "Fatal error, see '${LOG_FILE}'."
  fi

  # update 'bag-info.txt' file
  if [[ -f "${input_path}/bag-info.txt" ]]; then
    if [[ "$(grep 'Payload-Oxum' < "${input_path}/bag-info.txt")" ]]; then
      for f in $(find ${input_path}/data -type f); do
        (( size+=$(wc -c < ${f} | bc) ))
      done
      number=$(find ${input_path}/data -type f | wc -l | bc)
      sed -i.bak "s/^\(Payload-Oxum:\).*/\1 ${size}.${number}/" "${input_path}/bag-info.txt"
      [[ -f "${input_path}/bag-info.txt.bak" ]] && rm "${input_path}/bag-info.txt.bak"
    fi
  fi

  # update 'tagmanifest-<algorithm>.txt' manifest file
  tmp_name="${input_path}/tagmanifest-${algorithm}.txt"
  if [[ -f "${tmp_name}" ]]; then
    if [[ -f "${input_path}/bag-info.txt" ]]; then
      tmp="$(${!algorithm} "${input_path}/bag-info.txt" | awk '{print $1}')"
      sed -i.bak "s#^.*\(${tmp_name}\)#${tmp}  \1#" "${tmp_name}"
      [[ -f "${tmp_name}.bak" ]] && rm "${tmp_name}.bak"
    fi
    tmp="$(${!algorithm} "${input_path}/manifest-${algorithm}.txt" | awk '{print $1}')"
    sed -i.bak "s#^.*\(manifest-${algorithm}.txt\)#${tmp}  \1#" "${tmp_name}"
    [[ -f "${tmp_name}.bak" ]] && rm "${tmp_name}.bak"
    echo -e "${BLUE}'manifest-${algorithm}.txt' has been updated.${NC}"
    echo "'manifest-${algorithm}.txt' has been updated." >> "${LOG_FILE}"
  fi
}


# check that Bash 3.2 or later is running  #### MODIFICARE TEST ####
if ! printf '%s\n%s\n' "$(bash -c 'echo ${BASH_VERSION}')" "3.2" | sort -rVC; then
  echo -e "${BLUE}Warning: This 'bash' binary is very old. Tested for version 3.2 or later.${NC}"
fi

# start log file
[[ -d '/tmp/AVpres' ]] || mkdir -p '/tmp/AVpres'
LOG_FILE="$(mktemp -q "/tmp/AVpres/${SCRIPT_NAME}.XXXXXXXXXX")"
echo "$(date_time) ${SCRIPT_NAME} ${VERSION}" > "${LOG_FILE}"
echo "$(date_time) $0 $*" >> "${LOG_FILE}"
echo "$(date_time) START" >> "${LOG_FILE}"

# parse and process provided input
(( $# == 0 )) && print_prompt
while getopts ":b:i:-:hx" opt; do
  case "${opt}" in
    b) if [[ "${OPTARG:0:1}" == '-' ]]; then
         abort "Error: The parameter '-b' requires an argument."
       else
         input_path="${OPTARG}"
       fi ;;
    i) if [[ "${OPTARG:0:1}" == '-' ]]; then
         abort "Error: The option '-i' requires an argument."
       else
         input_path="${OPTARG}"
       fi ;;
    -) case "${OPTARG}" in
         bagit=?*) input_path="${OPTARG#*=}" ;;
         input=?*) input_path="${OPTARG#*=}" ;;
         md5=?*) md5="${OPTARG#*=}" ;;
         sha1=?*) sha1="${OPTARG#*=}" ;;
         sha256=?*) sha256="${OPTARG#*=}" ;;
         sha512=?*) sha512="${OPTARG#*=}" ;;
         crc32=?*) crc32="${OPTARG#*=}" ;;
         xxh32=?*) xxh32="${OPTARG#*=}" ;;
         xxh64=?*) xxh64="${OPTARG#*=}" ;;
         xxh128=?*) xxh128="${OPTARG#*=}" ;;
         help) print_help ;;
         options) print_options ;;
         *) abort "Error: The option '--${OPTARG}' is not valid." ;;
       esac ;;
    h) print_help ;;
    x) print_options ;;
    :) abort "Error: The option '-${OPTARG}' requires an argument." ;;
    *) abort "Error: The option '-${OPTARG}' is not valid." ;;
  esac
done

# check input folder
check_input

# find algorithm and check that command is running
check_algorithm

# update BagIt archive
update_bagit

# end log file
echo "$(date_time) END" >> "${LOG_FILE}"
