#!/bin/bash
# Der Standard RSS helper - fetches via internal fivefilters
set -e

CRED_FILE="${CRED_FILE:-$(dirname "$0")/../.credentials/services.env}"
source "$CRED_FILE"

RSS_SOURCE="https://www.derstandard.at/rss"
SEEN_FILE="${DERSTANDARD_SEEN_FILE:-$HOME/clawd/memory/derstandard-seen.txt}"

CURL="curl -sk"

# Ensure seen file exists
mkdir -p "$(dirname "$SEEN_FILE")"
touch "$SEEN_FILE"

usage() {
  cat <<EOF
Usage: derstandard <command> [args]

Commands:
  items [max]              NEW items only (unseen), title + URL pairs
  items --all [max]        All items including seen
  article <url>            Fetch single article content
  articles <url1,url2,...> Fetch multiple articles + mark as seen
  urls [max]               Article URLs only (all)
  titles [max]             Article titles only (all)
  seen                     Show seen count and recent entries
  reset                    Clear seen history
  raw [max]                Full RSS XML
EOF
}

fetch_feed() {
  local max="${1:-50}"
  local encoded_url=$(printf '%s' "$RSS_SOURCE" | sed 's/:/%3A/g; s/\//%2F/g')
  $CURL "${FIVEFILTERS_URL}/makefulltextfeed.php?url=${encoded_url}&max=${max}&links=preserve&exc="
}

fetch_single_article() {
  local url="$1"
  local encoded_url=$(printf '%s' "$url" | sed 's/:/%3A/g; s/\//%2F/g; s/\?/%3F/g; s/&/%26/g; s/=/%3D/g')
  $CURL "${FIVEFILTERS_URL}/makefulltextfeed.php?url=${encoded_url}&max=1&links=preserve&exc=" | \
    perl -0777 -ne 'print $1 if /<item>.*?<description>(.*?)<\/description>.*?<\/item>/s' | \
    sed 's/&lt;/</g; s/&gt;/>/g; s/&quot;/"/g; s/&amp;/\&/g' | \
    sed 's/<[^>]*>//g' | \
    tr '\n' ' ' | sed 's/  */ /g'
}

decode_entities() {
  sed 's/&amp;amp;/\&/g; s/&amp;/\&/g; s/&lt;/</g; s/&gt;/>/g; s/&quot;/"/g; s/&#39;/'"'"'/g'
}

mark_seen() {
  local url="$1"
  # Only add if not already present
  if ! grep -qF "$url" "$SEEN_FILE" 2>/dev/null; then
    echo "$url" >> "$SEEN_FILE"
  fi
}

prune_seen() {
  # Keep only last 200 entries
  local count=$(wc -l < "$SEEN_FILE" 2>/dev/null | tr -d ' ')
  if [ "$count" -gt 200 ]; then
    local excess=$((count - 200))
    tail -200 "$SEEN_FILE" > "${SEEN_FILE}.tmp" && mv "${SEEN_FILE}.tmp" "$SEEN_FILE"
  fi
}

is_seen() {
  grep -qF "$1" "$SEEN_FILE" 2>/dev/null
}

case "${1:-}" in
  items)
    show_all=false
    max=50
    shift || true
    if [ "${1:-}" = "--all" ]; then
      show_all=true
      shift || true
    fi
    [ -n "${1:-}" ] && max="$1"
    
    feed=$(fetch_feed "$max")
    
    # Build items list, mark all as seen, only display unseen (unless --all)
    while IFS=$'\t' read -r title url; do
      if $show_all || ! is_seen "$url"; then
        printf '%s\t%s\n' "$title" "$url"
      fi
      # Mark as seen regardless of display
      mark_seen "$url"
    done < <(
      titles=$(echo "$feed" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities)
      urls=$(echo "$feed" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story")
      paste <(echo "$titles") <(echo "$urls") 2>/dev/null | head -"$max"
    )
    
    # Prune to max 200 entries
    prune_seen
    ;;
  article)
    [ -z "${2:-}" ] && { echo "Usage: derstandard article <url>"; exit 1; }
    fetch_single_article "$2"
    ;;
  articles)
    [ -z "${2:-}" ] && { echo "Usage: derstandard articles <url1,url2,...>"; exit 1; }
    IFS=',' read -ra URLS <<< "$2"
    for url in "${URLS[@]}"; do
      # Extract title from URL slug
      title=$(echo "$url" | grep -oP '/\d+/\K[^?]+' | tr '-' ' ' | sed 's/.*/\u&/')
      echo "=== ${title} ==="
      fetch_single_article "$url"
      echo ""
      echo ""
      # Mark as seen
      mark_seen "$url"
    done
    prune_seen
    ;;
  urls)
    fetch_feed "${2:-50}" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story"
    ;;
  titles)
    fetch_feed "${2:-50}" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities
    ;;
  seen)
    count=$(wc -l < "$SEEN_FILE" 2>/dev/null | tr -d ' ')
    echo "Seen: $count URLs"
    if [ "$count" -gt 0 ]; then
      echo "Recent:"
      tail -5 "$SEEN_FILE"
    fi
    ;;
  reset)
    > "$SEEN_FILE"
    echo "Cleared seen history"
    ;;
  raw)
    fetch_feed "${2:-50}"
    ;;
  *)
    usage
    ;;
esac
