#!/usr/bin/env bash
set -euo pipefail

# Import the existing live homepage of a site into Microweber as a "Site
# Homepage" page so it's editable in /mw/admin/page from day one.
#
# Idempotent: if a page with url='site-homepage' already exists, its content is
# overwritten only if --force is passed; otherwise the script logs and exits 0.
#
# Usage:
#   bash import_homepage_into_mw.sh \
#     --site-root /srv/www/seethruit \
#     --homepage-url 'https://seethruit.net/' \
#     [--mw-at-root]    # also sets is_home=1
#     [--force]         # overwrite existing 'site-homepage' content
#
#   bash import_homepage_into_mw.sh \
#     --site-root /srv/www/seethruit \
#     --from-file /srv/www/seethruit/public/index-classic.php \
#     [--mw-at-root] [--force]
#
# --from-file is the canonical-source mode: it reads a local PHP/HTML file
# (e.g. the original static index-classic.php) and extracts <main>...</main>
# the same way the URL fetch path does. This is the right mode after MW has
# already taken over /, since curl would otherwise return MW-themed markup.
#
# The script reads MySQL credentials from <site-root>/private/mw/.env so each
# install hits the right database without us re-passing them on the CLI.

SITE_ROOT=""
HOMEPAGE_URL=""
FROM_FILE=""
MW_AT_ROOT=0
FORCE=0
TITLE_OVERRIDE=""

while (($#)); do
  case "$1" in
    --site-root) SITE_ROOT="${2:-}"; shift 2 ;;
    --homepage-url) HOMEPAGE_URL="${2:-}"; shift 2 ;;
    --from-file) FROM_FILE="${2:-}"; shift 2 ;;
    --title) TITLE_OVERRIDE="${2:-}"; shift 2 ;;
    --mw-at-root) MW_AT_ROOT=1; shift ;;
    --force) FORCE=1; shift ;;
    *) echo "Unknown arg: $1" >&2; exit 1 ;;
  esac
done

if [[ -z "$SITE_ROOT" || ! -d "$SITE_ROOT/private/mw" ]]; then
  echo "Error: --site-root must point to a site with private/mw/ deployed" >&2
  exit 1
fi
if [[ -z "$HOMEPAGE_URL" && -z "$FROM_FILE" ]]; then
  echo "Error: one of --homepage-url or --from-file is required" >&2
  exit 1
fi
if [[ -n "$HOMEPAGE_URL" && -n "$FROM_FILE" ]]; then
  echo "Error: --homepage-url and --from-file are mutually exclusive" >&2
  exit 1
fi
if [[ -n "$FROM_FILE" && ! -f "$FROM_FILE" ]]; then
  echo "Error: --from-file path '$FROM_FILE' is not a regular file" >&2
  exit 1
fi

ENV_FILE="$SITE_ROOT/private/mw/.env"
[[ -f "$ENV_FILE" ]] || { echo "Error: $ENV_FILE missing" >&2; exit 1; }

read_env() {
  grep -E "^${1}=" "$ENV_FILE" | head -n1 | cut -d= -f2- | sed -e 's/^"//' -e 's/"$//'
}

DB_HOST="$(read_env DB_HOST)"
DB_PORT="$(read_env DB_PORT)"
DB_NAME="$(read_env DB_DATABASE)"
DB_USER="$(read_env DB_USERNAME)"
DB_PASS="$(read_env DB_PASSWORD)"
: "${DB_HOST:=127.0.0.1}"
: "${DB_PORT:=3306}"

if [[ -z "$DB_NAME" || -z "$DB_USER" ]]; then
  echo "Error: DB_DATABASE/DB_USERNAME missing from $ENV_FILE" >&2
  exit 1
fi

# Source the homepage HTML (live URL or local file).
TMP_HTML="$(mktemp)"
trap 'rm -f "$TMP_HTML" "$TMP_BODY" "$TMP_SQL"' EXIT
TMP_BODY="$(mktemp)"
TMP_SQL="$(mktemp)"

if [[ -n "$HOMEPAGE_URL" ]]; then
  echo "Fetching $HOMEPAGE_URL ..."
  if ! curl -fsSL --max-time 30 -o "$TMP_HTML" "$HOMEPAGE_URL"; then
    echo "Error: curl failed for $HOMEPAGE_URL" >&2
    exit 1
  fi
else
  echo "Reading $FROM_FILE ..."
  cp -f "$FROM_FILE" "$TMP_HTML"
fi

BYTES="$(wc -c < "$TMP_HTML" | tr -d ' ')"
if (( BYTES < 200 )); then
  echo "Error: source HTML is suspiciously small ($BYTES bytes)" >&2
  exit 1
fi

# Extract the <main>...</main> body if present, else fall back to everything
# between <body>...</body>. Strip <script> blocks (the live page may have
# Drayton/See-Thru runtime hooks the editor does not need).
php -r '
$h = file_get_contents($argv[1]);
$out = null;
if (preg_match("#<main\b[^>]*>(.*?)</main>#is", $h, $m)) {
    $out = $m[1];
} elseif (preg_match("#<body\b[^>]*>(.*?)</body>#is", $h, $m)) {
    $out = $m[1];
} else {
    $out = $h;
}
$out = preg_replace("#<script\b[^>]*>.*?</script>#is", "", $out);
$out = preg_replace("#<noscript\b[^>]*>.*?</noscript>#is", "", $out);
$out = preg_replace("#<header\b[^>]*>.*?</header>#is", "", $out);
$out = preg_replace("#<footer\b[^>]*>.*?</footer>#is", "", $out);
$out = preg_replace("#<nav\b[^>]*>.*?</nav>#is", "", $out);
$out = preg_replace("#<!--.*?-->#s", "", $out);
$out = trim($out);
if ($out === "") { fwrite(STDERR, "warning: extracted body is empty\n"); }
file_put_contents($argv[2], $out);
' "$TMP_HTML" "$TMP_BODY"

BODY_BYTES="$(wc -c < "$TMP_BODY" | tr -d ' ')"
echo "Extracted body: $BODY_BYTES bytes"
if (( BODY_BYTES < 50 )); then
  echo "Error: extracted body is too small to be useful" >&2
  exit 1
fi

# Default the Site Homepage title to a friendly per-site label
if [[ -n "$HOMEPAGE_URL" ]]; then
  HOST_LABEL="$(echo "$HOMEPAGE_URL" | awk -F/ '{print $3}')"
else
  HOST_LABEL="$(basename "$SITE_ROOT")"
fi
TITLE="${TITLE_OVERRIDE:-Site Homepage}"

# UPSERT via mysql. We base64-encode the body so we don't need to escape
# arbitrary HTML for shell + SQL.
BODY_B64="$(base64 -w0 < "$TMP_BODY")"
IS_HOME=0
if [[ "$MW_AT_ROOT" == "1" ]]; then IS_HOME=1; fi

cat > "$TMP_SQL" <<SQL
SET @body  = FROM_BASE64('$BODY_B64');
SET @title = '$(printf "%s" "$TITLE" | sed "s/'/''/g")';
SET @url   = 'site-homepage';
SET @host  = '$(printf "%s" "$HOST_LABEL" | sed "s/'/''/g")';
SET @ishome = $IS_HOME;
SET @force = $FORCE;
SET @existing := (SELECT id FROM content WHERE url = @url AND is_deleted = 0 LIMIT 1);

-- Insert if missing
INSERT INTO content
  (content_type, subtype, url, title, parent, content, is_active, is_home,
   is_deleted, layout_file, status, created_at, updated_at, posted_at)
SELECT 'page','static', @url, @title, 0, @body, 1, @ishome,
       0, 'clean.php', 'published', NOW(), NOW(), NOW()
WHERE @existing IS NULL;

-- Update if existing AND --force, OR if existing but body is empty
UPDATE content
SET content = @body,
    title   = @title,
    is_active = 1,
    is_home = GREATEST(is_home, @ishome),
    updated_at = NOW()
WHERE id = @existing
  AND (@force = 1 OR content IS NULL OR content = '');

SELECT IF(@existing IS NULL, 'INSERTED', IF(@force=1,'UPDATED','SKIPPED-EXISTS')) AS result,
       COALESCE(@existing, LAST_INSERT_ID()) AS content_id;
SQL

mysql --protocol=TCP -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" "$DB_NAME" \
  --table < "$TMP_SQL"

# If we set is_home=1 (See Thru IT), make sure no OTHER content row claims to be
# home -- otherwise MW renders an unrelated page at /.
if [[ "$MW_AT_ROOT" == "1" ]]; then
  mysql --protocol=TCP -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" "$DB_NAME" -e \
    "UPDATE content SET is_home=0 WHERE is_home=1 AND url <> 'site-homepage';"
fi

echo "Imported homepage content into MW at $ENV_FILE -> /mw/admin/page (url='site-homepage')."
