#!/bin/bash
set -e
set -u

NGCP_ROLES_FILE='/etc/default/ngcp-roles'
MYSQL_CREDENTIALS='/etc/mysql/sipwise_extra.cnf'
DB_CONF='/etc/default/ngcp-db'

# helper function
usage () {
  echo -e "$0 - check the status of pair database replication."
  echo -e "\nUsage:\n"
  echo -e "    -a  check all replications on the host"
  echo -e "    -h  display this help text and exit"
  echo -e "    -v  set output mode to a descriptive status"
  echo -e "    -l  check the status of the local database instead of pair one (connection '' from db01a)"
  echo -e "    -L  check the status of the local database instead of pair one (connection 'db01b' from db01b)"
  echo -e "    -g  check the status of Geo replication (connection 'rdb01a' on DB node)"
  echo -e "    -G  check the status of Geo replication (connection 'rdb01b' on DB node)"
  echo -e "    -m  max mysql replication lag (default 0s)"
  echo -e "\nReturn codes:\n"
  echo -e "     0  no replication errors"
  echo -e "     1  there are replication errors, use -v to have a description of the error"
  echo -e "     2  connection problems with the database, use -v to have a description of the error"
  echo -e "     3  replication lag too big, use -v to have a description of the error"
  echo -e "     4  general script errors, use -v to have a description of the error"
  echo -e "Note: all return codes will be summarised in case of multiple errors"
}


get_mysql_replication_errors() {
  _Last_Errno=$(echo "${slave_status}" | awk '/Last_Errno/ { print $NF }')
  _Last_Error=$(echo "${slave_status}" | awk -F"_Error: " '/Last_Error/ { print $2 }')

  _Last_SQL_Errno=$(echo "${slave_status}" | awk '/Last_SQL_Errno/ { print $NF }')
  _Last_SQL_Error=$(echo "${slave_status}" | awk -F"_Error: " '/Last_SQL_Error/ { print $2 }')

  _Last_IO_Errno=$(echo "${slave_status}" | awk '/Last_IO_Errno/ { print $NF }')
  _Last_IO_Error=$(echo "${slave_status}" | awk -F"_Error: " '/Last_IO_Error/ { print $2 }')

  if [ "${_Last_Errno}" != "0" ] ; then
    echo "[#${_Last_Errno}] ${_Last_Error}" >&2
  fi

  if [ "${_Last_SQL_Errno}" != "0" ] ; then
    echo "[#${_Last_SQL_Errno}] ${_Last_SQL_Error}" >&2
  fi

  if [ "${_Last_IO_Errno}" != "0" ] ; then
    echo "[#${_Last_IO_Errno}] ${_Last_IO_Error}" >&2
  fi
}

show_hint() {
  echo "[${HOSTNAME}] Hint: mysql -h${DB_HOST} -P${DB_PORT} -e \"show slave '${DB_CONNECTION}' status\\G\" "
}

check_all_replications() {
  local rc=0
  local opt=""

  if ${OUTPUT} ; then
    opt="-v"
  fi

  $0 ${opt}    || rc=$((rc+$?))  # check pair  replication sp1<->sp2
  if [[ "${NGCP_TYPE}" == 'carrier' && "${NGCP_IS_PROXY}" == 'yes' ]] ; then
    $0 ${opt} -l || rc=$((rc+$?))  # check local replication db01a<->prx:3308
    $0 ${opt} -L || rc=$((rc+$?))  # check local replication db01b<->prx:3308
  fi
  if [[ "${NGCP_TYPE}" == 'carrier' && "${NGCP_IS_GEO_CLUSTER}" == 'yes' && "${NGCP_IS_DB}" == 'yes' ]] ; then
    $0 ${opt} -g || rc=$((rc+$?))  # check Geo replication rdb01a->db01
    $0 ${opt} -G || rc=$((rc+$?))  # check Geo replication rdb01b->db01
  fi
  return "${rc}"
}

OUTPUT=false
DB_LOCAL=false
DB_CONNECTION=""
HOSTNAME=$(ngcp-hostname)
MAX_MYSQL_LAG=0
CHECK_ALL_REPLICATIONS=false

while [ $# -gt 0 ]; do
  case "${1}" in
    -l)
      DB_LOCAL=true
      DB_CONNECTION=""
      ;;
    -L)
      DB_LOCAL=true
      DB_CONNECTION="db01b"
      ;;
    -g)
      DB_CONNECTION="rdb01a"
      ;;
    -G)
      DB_CONNECTION="rdb01b"
      ;;
    -m)
      MAX_MYSQL_LAG=$2
      shift
      ;;
    -v)
      OUTPUT=true
      ;;
    -a)
      CHECK_ALL_REPLICATIONS=true
      ;;
    -h|--help)
      usage
      exit 0
      ;;
  esac

  shift
done

# Reading information from ngcp-roles
if ! [ -r "${NGCP_ROLES_FILE}" ] ; then
  if ${OUTPUT} ; then
    echo "Error: can't read file ${NGCP_ROLES_FILE} on '${HOSTNAME}'!" >&2
  fi
  exit 4
fi

# shellcheck disable=SC1090
. "${NGCP_ROLES_FILE}"

if [ -z "${NGCP_TYPE}" ] ; then
  if ${OUTPUT} ; then
    echo "Error: missing information in ${NGCP_ROLES_FILE} on '${HOSTNAME}', cannot continue!" >&2
  fi
  exit 4
fi

if [ "${NGCP_TYPE}" = "spce" ] ; then
  if ${OUTPUT} ; then
    echo "Error: service not available on CE version."
  fi
  exit 4
fi

if "${CHECK_ALL_REPLICATIONS}"; then
  check_all_replications
  exit 0
fi

if [[ "${NGCP_TYPE}" == 'carrier' && "${NGCP_IS_PROXY}" == 'no' ]] ; then
  DB_LOCAL=false
fi

# MySQL credentials file.
if ! [ -r "${MYSQL_CREDENTIALS}" ] ; then
  if ${OUTPUT} ; then
    echo "Error: can't read file ${MYSQL_CREDENTIALS} on '${HOSTNAME}'!" >&2
  fi
  exit 4
fi


# Reading databases information (ip and port)
if ! [ -r "${DB_CONF}" ] ; then
  if ${OUTPUT} ; then
    echo "Error: can't read file ${DB_CONF} on '${HOSTNAME}'!" >&2
  fi
  exit 4
fi

# shellcheck disable=SC1090
. "${DB_CONF}"

if ${DB_LOCAL} ; then
  DB_HOST="${LOCAL_DBHOST}"
  DB_PORT="${LOCAL_DBPORT}"
else
  DB_HOST="${PAIR_DBHOST}"
  DB_PORT="${PAIR_DBPORT}"
fi


OPTS=(--defaults-extra-file="${MYSQL_CREDENTIALS}" -h"${DB_HOST}" -P"${DB_PORT}" -Bs)


if ! timeout 2 mysql "${OPTS[@]}" -e "select now()" >/dev/null 2>&1 ; then
  if ${OUTPUT} ; then
    echo "Error: failed connection with the database on '${HOSTNAME}'!" >&2
    echo "Is MariaDB running on ${DB_HOST}:${DB_PORT} on '${HOSTNAME}'?" >&2
  fi
  exit 2
fi


slave_status=$(timeout 2 mysql "${OPTS[@]}" -e "show slave '${DB_CONNECTION}' status\G")

_Master_Host=$(echo "${slave_status}" | awk '/Master_Host:/ { print $NF }')
_Slave_IO_Running=$(echo "${slave_status}" | awk '/Slave_IO_Running:/ { print $NF }')
_Slave_SQL_Running=$(echo "${slave_status}" | awk '/Slave_SQL_Running:/ { print $NF }')
_Seconds_Behind_Master=$(echo "${slave_status}" | awk '/Seconds_Behind_Master:/ { print $NF }')

if [ -z "${_Slave_IO_Running}" ] || [ -z "${_Slave_SQL_Running}" ] ; then
  if ${OUTPUT} ; then
    echo "[${HOSTNAME}] Is replication configured on ${DB_HOST}:${DB_PORT} from '${_Master_Host}'?" >&2
    show_hint >&2
  fi
  exit 1

elif [ "${_Slave_SQL_Running,,}" != 'yes' ] ; then
  if ${OUTPUT} ; then
    echo "[${HOSTNAME}] Slave_SQL_Running is stopped on ${DB_HOST}:${DB_PORT} from '${_Master_Host}'!" >&2
    get_mysql_replication_errors
    show_hint >&2
  fi
  exit 1

elif [ "${_Slave_IO_Running,,}" != 'yes' ] ; then
  if ${OUTPUT} ; then
    echo "[${HOSTNAME}] Slave_IO_Running is stopped on ${DB_HOST}:${DB_PORT}!" >&2
    get_mysql_replication_errors
    show_hint >&2
  fi
  exit 1

elif [ "${_Seconds_Behind_Master}" -gt "${MAX_MYSQL_LAG}" ] ; then
  if ${OUTPUT} ; then
    echo "[${HOSTNAME}] Replication lag is too big: ${_Seconds_Behind_Master} seconds on ${DB_HOST}:${DB_PORT} from '${_Master_Host}'!" >&2
    show_hint >&2
  fi
  exit 3

fi


if ${OUTPUT} ; then
  echo "[${HOSTNAME}] Replication slave is running on ${DB_HOST}:${DB_PORT} from '${_Master_Host}'. No replication errors."
fi
exit 0
