#!/usr/bin/env bash

# Recursively create a CSV file with metadata of all AV files in a folder.
# The main container, video and audio codec data are extracted.
#
# Copyright (c) 2022-2026 by Reto Kromer <https://reto.ch/>
#
# This Bash script is released under a 3-Clause BSD License and is provided
# "as is" without warranty or support of any kind.


# initialise constants
VERSION='2026-01-25'
SCRIPT_NAME="$(basename "$0")"
CONFIG_FILE="${HOME}/.config/AVpres/Bash_AVpres/${SCRIPT_NAME}.txt"
RED='\033[1;31m'
BLUE='\033[1;34m'
NC='\033[0m'

# load configuration file if any and initialise default values
[[ -f "${CONFIG_FILE}" ]] && . "${CONFIG_FILE}"
verbosity="${verbosity:-count}"
output_file="${output_file:-${HOME}/Desktop/report_$(date +'%F').csv}"
extension_regex="${extension_regex:-^(txt|md5|xml|pdf|xlsx|xls|docx|doc|zip|gz)$}"
ffprobe_bin="${ffprobe_bin:-$(which ffprobe)}"

# initialise another default value
verbosity_regex='^(path|filename|count|off)$'

# initialise variables
unset input_path
unset container
unset video_codec
unset audio_codec


# get date-and-time stamp
date_time() {
  TZ='UTC' date +'[%F %T %Z]'
}

# print an error message and exit with status 1
abort() {
  echo -e "${RED}${1:-An unknown error occurred.\a}${NC}"
  echo "$(date_time) ${1:-An unknown error occurred.}" >> "${LOG_FILE}"
  echo "$(date_time) END" >> "${LOG_FILE}"
  exit 1
}


# print a minimal help message and exit with status 1
print_prompt() {
  echo "$(date_time) print prompt" >> "${LOG_FILE}"
  cat << EOF
Help:
  ${SCRIPT_NAME} -h
EOF
  echo "$(date_time) END" >> "${LOG_FILE}"
  exit 1
}


# print the help message and exit with status 0
print_help() {
  local tmp
  if [[ -f "${CONFIG_FILE}" ]]; then
    tmp="local configuration file found and loaded"
  else
    tmp="no local configuration file found on this computer"
  fi

  echo "$(date_time) print help" >> "${LOG_FILE}"
  cat << EOF

Usage:
  ${SCRIPT_NAME} -i <input_folder> [-o <output_file>]
  ${SCRIPT_NAME} -h
Options:
  -i  input folder to be processed recursively
  -o  output file for the comma-separated values
      (default is '${output_file}')
  -h  this help
  -x  advanced options with their default arguments
      (${tmp})
Dependency:
  ffprobe
See also:
  man ${SCRIPT_NAME}
  https://avpres.net/Bash_AVpres/
About:
  Abstract: Recursively create a CSV file with metadata of all AV files in a
            folder
  Version:  ${VERSION}

EOF
  echo "$(date_time) END" >> "${LOG_FILE}"
  exit 0
}


# print advanced options with their default arguments and exit with status 0
print_options() {
  echo "$(date_time) print parameters" >> "${LOG_FILE}"
  if [[ -f "${CONFIG_FILE}" ]]; then
    cat << EOF

Local configuration file
  '${CONFIG_FILE}'
found and loaded.
EOF
  else
    cat << EOF

No local configuration file for '${SCRIPT_NAME}' found on this computer.
EOF
  fi
  cat << EOF

Advanced options with their default arguments:
  --verbosity='${verbosity}'
  --output='${output_file}'
  --exclusion='${extension_regex}'

FFmpeg default binary:
  --ffprobe='${ffprobe_bin}'

EOF
  echo "$(date_time) END" >> "${LOG_FILE}"
  exit 0
}


# check that a command is running
check_command() {
  if ! command -v "${1}" &> /dev/null; then
    abort "Error: '${1}' binary not found."
  fi

  echo "$(date_time) '${1}' is running" >> "${LOG_FILE}"
  return 0
}


# verify that provided input folder is valid and normalise path if needed
verify_input() {
  echo "$(date_time) verify input" >> "${LOG_FILE}"
  if [[ "${1}" == '' ]]; then
    abort "Error: No input folder provided."
  elif [[ ! -d "${1}" ]]; then
    abort "Error: '${1}' is not a directory."
  fi

  if [[ "${1%/*}" == "${1}" ]]; then
    input_path="${PWD}/${1}"
  fi
  echo "$(date_time) input_path='${input_path}'" >> "${LOG_FILE}"

  return 0
}


# verify that provided output file is valid and normalise path if needed,
# or use the default output file
verify_output() {
  echo "$(date_time) verify output" >> "${LOG_FILE}"
  if [[ "${1}" == '' ]]; then
    abort "Error: Path to output file cannot be empty."
  fi
  if [[ "${output_file%/*}" == "${output_file}" ]]; then
    output_file="${PWD}/${1}"
  fi
  if ! touch "${output_file}" &> /dev/null; then
    abort "Error: Cannot create an output file '${output_file}'."
  fi
  echo "$(date_time) output_file='${output_file}'" >> "${LOG_FILE}"

  # write CSV header
  # metadata from container
  line="path,filename,extension,duration,size,major brand"
  # metadata from video codec
  line+=",video codec,profile,width,height,SAR,DAR,pixel format,field order,"
  line+="frame rate"
  # metadata from audio codec
  line+=",audio codec,sample format,sample rate,channels,channel layout"
  echo "${line}" > "${output_file}"

  return 0
}


# verity verbosity
verify_verbosity() {
  echo "$(date_time) verify verbosity" >> "${LOG_FILE}"
  if [[ ! "${1}" =~ ${verbosity_regex} ]]; then
    echo -e "${BLUE}Warning: '${1}' is not a valid verbosity. Using 'count'."
    verbosity='count'
  fi
  echo "$(date_time) verbosity='${verbosity}'" >> "${LOG_FILE}"

  return 0
}


# extract metadata from container
ex_c() {
  if [[ "${1}" == '' ]]; then
    abort "Internal error: No item provided for container metadata extraction."
  else
    echo "${container}" | grep "${1}" | awk -F= '{print $2}'
  fi
}


# extract metadata from video codec
ex_v() {
  if [[ "${1}" == '' ]]; then
    abort "Internal error: No item provided for video codec metadata extraction."
  else
    echo "${video_codec}" | grep "${1}" | awk -F= '{print $2}'
  fi
}

# extract metadata from audio codec
ex_a() {
  if [[ "${1}" == '' ]]; then
    abort "Internal error: No item provided for audio codec metadata extraction."
  else
    echo "${audio_codec}" | grep "${1}" | awk -F= '{print $2}'
  fi
}


# recursively create a CSV file with metadata of all AV files in a folder
generate_CVS_file() {
  local in_folder="${1}"
  local out_file="${2}"
  local tmp_file
  local in_file
  local line
  local param_c='-show_format -print_format flat -v quiet'
  local param_v='-show_streams -select_streams v -print_format flat -v quiet'
  local param_a='-show_streams -select_streams a -print_format flat -v quiet'
  local all_files=0
  local AV_files=0

  echo -e "${BLUE}Please wait while extracting the metadata...${NC}"
  echo "$(date_time) generating metadata list" >> "${LOG_FILE}"
  cd "${in_folder}" || abort "Error: 'cd ${in_folder}' is failing."

  find . -type f -print0 | xargs -0 -n1 echo | while read -r tmp_file; do
    in_file=$(echo "${tmp_file}" | sed "s#\./##")
    ((all_files++))
    if [[ "${verbosity}" == 'count' ]]; then
      printf '\r%d files analysed' "${all_files}"
    elif [[ "${verbosity}" == 'filename' ]]; then
      echo "> $(basename "${in_file}")"
    elif [[ "${verbosity}" == 'path' ]]; then
      echo "> ${in_file}"
    fi
    container="$("${ffprobe_bin}" ${param_c} "${in_file}")"
    video_codec="$("${ffprobe_bin}" ${param_v} "${in_file}")"
    audio_codec="$("${ffprobe_bin}" ${param_a} "${in_file}")"

    # ignore files without AV content
    if [[ $(echo "${in_file##*.}" | grep -E "${extension_regex}") ]]; then
      echo "$(date_time) - ${in_file}" >> "${LOG_FILE}"
      continue
    elif [[ "$(ex_v 'streams.stream.0.codec_name=*')" == '' \
      && "$(ex_a 'streams.stream.0.codec_name=*')" == '' ]]
    then
      echo "$(date_time) - ${in_file}" >> "${LOG_FILE}"
      continue
    fi

    # split path, filename and extension
    echo "$(date_time) + ${in_file}" >> "${LOG_FILE}"
    if [[ "${in_file%/*}" == "${in_file}" ]]; then
      line=''
    else
      line="${in_file%/*}"
    fi
    line+=",$(basename "${in_file%%.*}")"
    line+=",${in_file##*.}"

    # extract container metadata
    line+=",$(ex_c 'format.duration=*')"
    line+=",$(ex_c 'format.size=*')"
    line+=",$(ex_c 'format.tags.major_brand=*')"

    # extract video codec metadata
    line+=",$(ex_v 'streams.stream.0.codec_name=*')"
    line+=",$(ex_v 'streams.stream.0.profile=*')"
    line+=",\"$(ex_v 'streams.stream.0.width=*')\""
    line+=",\"$(ex_v 'streams.stream.0.height=*')\""
    line+=",$(ex_v 'streams.stream.0.sample_aspect_ratio=*')"
    line+=",$(ex_v 'streams.stream.0.display_aspect_ratio=*')"
    line+=",$(ex_v 'streams.stream.0.pix_fmt=*')"
    line+=",$(ex_v 'streams.stream.0.field_order=*')"
    line+=",$(ex_v 'streams.stream.0.r_frame_rate=*')"

    # extract audio codec metadata
    line+=",$(ex_a 'streams.stream.0.codec_name=*')"
    line+=",$(ex_a 'streams.stream.0.sample_fmt=*')"
    line+=",$(ex_a 'streams.stream.0.sample_rate=*')"
    line+=",\"$(ex_a 'streams.stream.0.channels=*')\""
    line+=",$(ex_a 'streams.stream.0.channel_layout=*')"

    # write metadata line to CSV file
    echo "${line}" >> "${out_file}"
    ((AV_files++))
  done

  if [[ "${verbosity}" == 'count' ]]; then
    printf '\r'
  fi
  echo -e "${BLUE}${all_files} files analysed"
  echo -e "${AV_files} AV files found and the report is:${NC}"
  echo -e "  ${out_file}"
  echo "$(date_time) ${all_files} files analysed" >> "${LOG_FILE}"
  echo "$(date_time) ${AV_files} AV files found" >> "${LOG_FILE}"
  return 0
}


# start log file
[[ -d '/tmp/AVpres' ]] || mkdir -p '/tmp/AVpres'
LOG_FILE="$(mktemp "/tmp/AVpres/${SCRIPT_NAME}.XXXXXXXXXX")"
echo "$(date_time) ${SCRIPT_NAME} ${VERSION}" > "${LOG_FILE}"
echo "$(date_time) $0 $*" >> "${LOG_FILE}"
echo "$(date_time) START" >> "${LOG_FILE}"

# check if Bash 4.3 or later is running
bash_version="$(bash -c 'echo ${BASH_VERSION}')"
echo "$(date_time) running bash version = '${bash_version}'" >> "${LOG_FILE}"
if ! printf '%s\n%s\n' "${bash_version}" "4.3" | sort -rVC; then
  echo -en "${BLUE}Warning: This 'bash' binary is very old. "
  echo -e "Version 4.3 or later is preferred and the current 5.3 recommended.${NC}"
else
  shopt -s lastpipe
fi

# parse and process provided input
(( $# == 0 )) && print_prompt
while getopts ":i:o:-:hx" opt; do
  case "${opt}" in
    i) if [[ "${OPTARG:0:1}" == '-' ]]; then
         abort "Error: The option '-i' requires an argument."
       else
         input_path="${OPTARG}"
       fi ;;
    o) if [[ "${OPTARG:0:1}" == '-' ]]; then
         abort "Error: The option '-o' requires an argument."
       else
         output_file="${OPTARG}"
       fi ;;
    -) case "${OPTARG}" in
         input=?*) input_path="${OPTARG#*=}" ;;
         output=?*) output_file="${OPTARG#*=}" ;;
         verbosity=?*) verbosity="${OPTARG#*=}" ;;
         exclusion=?*) extension_regex="${OPTARG#*=}" ;;
         ffprobe=?*) ffprobe_bin="${OPTARG#*=}" ;;
         help) print_help ;;
         options) print_options ;;
         *) abort "Error: The option '--${OPTARG}' is not valid." ;;
       esac ;;
    h) print_help ;;
    x) print_options ;;
    :) abort "Error: The option '-${OPTARG}' requires an argument." ;;
    *) abort "Error: The option '-${OPTARG}' is not valid." ;;
  esac
done

# check that ffprobe is running
if [[ "${ffprobe_bin}" == '' ]]; then
  abort "Error: 'ffprobe' binary not found."
else
  check_command "${ffprobe_bin}"
fi

# verify that the input path is valid
verify_input "${input_path}"

# verify that the output path is valid
verify_output "${output_file}"

# verify verbosity
verify_verbosity "${verbosity}"

# generate CSV file with metadata of all AV files in a folder
generate_CVS_file "${input_path}" "${output_file}"

# end log file
echo "$(date_time) END" >> "${LOG_FILE}"
