From b603036509421cbf09e0c55a5d4b29e26b2c2d63 Mon Sep 17 00:00:00 2001 From: Agent Date: Tue, 3 Feb 2026 21:44:57 +0000 Subject: [PATCH] Add article fetching to derstandard helper --- TOOLS.md | 13 +++++++++---- bin/derstandard | 41 ++++++++++++++++++++++++++++++----------- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/TOOLS.md b/TOOLS.md index f9d28ca..20af716 100644 --- a/TOOLS.md +++ b/TOOLS.md @@ -45,15 +45,20 @@ When user arrives home, HA calls the webhook. Check `memory/arrival-reminders.js Helper script: `~/bin/derstandard` ```bash -derstandard urls [max] # Article URLs only (default: 50) -derstandard titles [max] # Article titles only -derstandard items [max] # Title + URL pairs (tab-separated) -derstandard raw [max] # Full RSS XML +derstandard items [max] # Title + URL pairs (tab-separated) +derstandard article # Full article content for a specific URL +derstandard urls [max] # Article URLs only (default: 50) +derstandard titles [max] # Article titles only +derstandard raw [max] # Full RSS XML ``` - Uses internal fivefilters proxy (bypasses web_fetch private IP block) - Pre-processes output for minimal token usage +**Workflow for news briefing:** +1. `derstandard items` → pick interesting titles +2. `derstandard article ` → get full content for selected articles + ## Forgejo Git Access Helper script: `~/bin/forgejo` diff --git a/bin/derstandard b/bin/derstandard index 03fb0a6..0731d2f 100755 --- a/bin/derstandard +++ b/bin/derstandard @@ -6,7 +6,6 @@ FIVEFILTERS_URL="https://fivefilters.cloonar.com" FIVEFILTERS_IP="10.42.97.5" RSS_SOURCE="https://www.derstandard.at/rss" -# Resolve to internal IP (bypasses web_fetch private IP block) CURL="curl -sk --resolve fivefilters.cloonar.com:443:${FIVEFILTERS_IP}" usage() { @@ -14,10 +13,11 @@ usage() { Usage: derstandard [args] Commands: - urls [max] Extract article URLs (default: 50) - titles [max] Extract article titles - items [max] Title + URL pairs (tab-separated) - raw [max] Raw RSS XML + items [max] Title + URL pairs for selection (default: 50) + article Fetch full article content for a specific URL + urls [max] Article URLs only + titles [max] Article titles only + raw [max] Full RSS XML EOF } @@ -27,17 +27,36 @@ fetch_feed() { $CURL "${FIVEFILTERS_URL}/makefulltextfeed.php?url=${encoded_url}&max=${max}&links=preserve&exc=" } +fetch_article() { + local url="$1" + local encoded_url=$(printf '%s' "$url" | sed 's/:/%3A/g; s/\//%2F/g; s/\?/%3F/g; s/&/%26/g; s/=/%3D/g') + $CURL "${FIVEFILTERS_URL}/makefulltextfeed.php?url=${encoded_url}&max=1&links=preserve&exc=" | \ + perl -0777 -ne 'print $1 if /.*?(.*?)<\/description>.*?<\/item>/s' | \ + sed 's/<//g; s/"/"/g; s/&/\&/g' | \ + sed 's/<[^>]*>//g' | \ + tr '\n' ' ' | sed 's/ */ /g' +} + +decode_entities() { + sed 's/&amp;/\&/g; s/&/\&/g; s/<//g; s/"/"/g; s/'/'"'"'/g' +} + case "${1:-}" in + items) + feed=$(fetch_feed "${2:-50}") + titles=$(echo "$feed" | grep -oP '\K[^<]+' | tail -n +2 | decode_entities) + urls=$(echo "$feed" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story") + paste <(echo "$titles") <(echo "$urls") 2>/dev/null | head -"${2:-50}" + ;; + article) + [ -z "${2:-}" ] && { echo "Usage: derstandard article <url>"; exit 1; } + fetch_article "$2" + ;; urls) fetch_feed "${2:-50}" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story" ;; titles) - fetch_feed "${2:-50}" | grep -oP '<title><!\[CDATA\[\K[^\]]+' | tail -n +2 - ;; - items) - feed=$(fetch_feed "${2:-50}") - paste <(echo "$feed" | grep -oP '<title><!\[CDATA\[\K[^\]]+' | tail -n +2) \ - <(echo "$feed" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story") 2>/dev/null | head -50 + fetch_feed "${2:-50}" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities ;; raw) fetch_feed "${2:-50}"