#!/bin/bash # Der Standard RSS helper - fetches via internal fivefilters set -e CRED_FILE="${CRED_FILE:-$(dirname "$0")/../.credentials/services.env}" source "$CRED_FILE" RSS_SOURCE="https://www.derstandard.at/rss" SEEN_FILE="${DERSTANDARD_SEEN_FILE:-$HOME/clawd/memory/derstandard-seen.txt}" CURL="curl -sk" # Ensure seen file exists mkdir -p "$(dirname "$SEEN_FILE")" touch "$SEEN_FILE" usage() { cat < [args] Commands: items [max] NEW items only (unseen), title + URL pairs items --all [max] All items including seen article Fetch single article content articles Fetch multiple articles + mark as seen urls [max] Article URLs only (all) titles [max] Article titles only (all) seen Show seen count and recent entries reset Clear seen history raw [max] Full RSS XML EOF } fetch_feed() { local max="${1:-50}" local encoded_url=$(printf '%s' "$RSS_SOURCE" | sed 's/:/%3A/g; s/\//%2F/g') $CURL "${FIVEFILTERS_URL}/makefulltextfeed.php?url=${encoded_url}&max=${max}&links=preserve&exc=" } fetch_single_article() { local url="$1" local encoded_url=$(printf '%s' "$url" | sed 's/:/%3A/g; s/\//%2F/g; s/\?/%3F/g; s/&/%26/g; s/=/%3D/g') $CURL "${FIVEFILTERS_URL}/makefulltextfeed.php?url=${encoded_url}&max=1&links=preserve&exc=" | \ perl -0777 -ne 'print $1 if /.*?(.*?)<\/description>.*?<\/item>/s' | \ sed 's/<//g; s/"/"/g; s/&/\&/g' | \ sed 's/<[^>]*>//g' | \ tr '\n' ' ' | sed 's/ */ /g' } decode_entities() { sed 's/&amp;/\&/g; s/&/\&/g; s/<//g; s/"/"/g; s/'/'"'"'/g' } mark_seen() { local url="$1" # Only add if not already present if ! grep -qF "$url" "$SEEN_FILE" 2>/dev/null; then echo "$url" >> "$SEEN_FILE" fi } prune_seen() { # Keep only last 200 entries local count=$(wc -l < "$SEEN_FILE" 2>/dev/null | tr -d ' ') if [ "$count" -gt 200 ]; then local excess=$((count - 200)) tail -200 "$SEEN_FILE" > "${SEEN_FILE}.tmp" && mv "${SEEN_FILE}.tmp" "$SEEN_FILE" fi } is_seen() { grep -qF "$1" "$SEEN_FILE" 2>/dev/null } case "${1:-}" in items) show_all=false max=50 shift || true if [ "${1:-}" = "--all" ]; then show_all=true shift || true fi [ -n "${1:-}" ] && max="$1" feed=$(fetch_feed "$max") # Build items list, mark all as seen, only display unseen (unless --all) while IFS=$'\t' read -r title url; do if $show_all || ! is_seen "$url"; then printf '%s\t%s\n' "$title" "$url" fi # Mark as seen regardless of display mark_seen "$url" done < <( titles=$(echo "$feed" | grep -oP '\K[^<]+' | tail -n +2 | decode_entities) urls=$(echo "$feed" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story") paste <(echo "$titles") <(echo "$urls") 2>/dev/null | head -"$max" ) # Prune to max 200 entries prune_seen ;; article) [ -z "${2:-}" ] && { echo "Usage: derstandard article <url>"; exit 1; } fetch_single_article "$2" ;; articles) [ -z "${2:-}" ] && { echo "Usage: derstandard articles <url1,url2,...>"; exit 1; } IFS=',' read -ra URLS <<< "$2" for url in "${URLS[@]}"; do # Extract title from URL slug title=$(echo "$url" | grep -oP '/\d+/\K[^?]+' | tr '-' ' ' | sed 's/.*/\u&/') echo "=== ${title} ===" fetch_single_article "$url" echo "" echo "" # Mark as seen mark_seen "$url" done prune_seen ;; urls) fetch_feed "${2:-50}" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story" ;; titles) fetch_feed "${2:-50}" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities ;; seen) count=$(wc -l < "$SEEN_FILE" 2>/dev/null | tr -d ' ') echo "Seen: $count URLs" if [ "$count" -gt 0 ]; then echo "Recent:" tail -5 "$SEEN_FILE" fi ;; reset) > "$SEEN_FILE" echo "Cleared seen history" ;; raw) fetch_feed "${2:-50}" ;; *) usage ;; esac