Add article fetching to derstandard helper
This commit is contained in:
parent
b7c5344a66
commit
b603036509
2 changed files with 39 additions and 15 deletions
13
TOOLS.md
13
TOOLS.md
|
|
@ -45,15 +45,20 @@ When user arrives home, HA calls the webhook. Check `memory/arrival-reminders.js
|
||||||
Helper script: `~/bin/derstandard`
|
Helper script: `~/bin/derstandard`
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
derstandard urls [max] # Article URLs only (default: 50)
|
derstandard items [max] # Title + URL pairs (tab-separated)
|
||||||
derstandard titles [max] # Article titles only
|
derstandard article <url> # Full article content for a specific URL
|
||||||
derstandard items [max] # Title + URL pairs (tab-separated)
|
derstandard urls [max] # Article URLs only (default: 50)
|
||||||
derstandard raw [max] # Full RSS XML
|
derstandard titles [max] # Article titles only
|
||||||
|
derstandard raw [max] # Full RSS XML
|
||||||
```
|
```
|
||||||
|
|
||||||
- Uses internal fivefilters proxy (bypasses web_fetch private IP block)
|
- Uses internal fivefilters proxy (bypasses web_fetch private IP block)
|
||||||
- Pre-processes output for minimal token usage
|
- Pre-processes output for minimal token usage
|
||||||
|
|
||||||
|
**Workflow for news briefing:**
|
||||||
|
1. `derstandard items` → pick interesting titles
|
||||||
|
2. `derstandard article <url>` → get full content for selected articles
|
||||||
|
|
||||||
## Forgejo Git Access
|
## Forgejo Git Access
|
||||||
|
|
||||||
Helper script: `~/bin/forgejo`
|
Helper script: `~/bin/forgejo`
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@ FIVEFILTERS_URL="https://fivefilters.cloonar.com"
|
||||||
FIVEFILTERS_IP="10.42.97.5"
|
FIVEFILTERS_IP="10.42.97.5"
|
||||||
RSS_SOURCE="https://www.derstandard.at/rss"
|
RSS_SOURCE="https://www.derstandard.at/rss"
|
||||||
|
|
||||||
# Resolve to internal IP (bypasses web_fetch private IP block)
|
|
||||||
CURL="curl -sk --resolve fivefilters.cloonar.com:443:${FIVEFILTERS_IP}"
|
CURL="curl -sk --resolve fivefilters.cloonar.com:443:${FIVEFILTERS_IP}"
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
|
|
@ -14,10 +13,11 @@ usage() {
|
||||||
Usage: derstandard <command> [args]
|
Usage: derstandard <command> [args]
|
||||||
|
|
||||||
Commands:
|
Commands:
|
||||||
urls [max] Extract article URLs (default: 50)
|
items [max] Title + URL pairs for selection (default: 50)
|
||||||
titles [max] Extract article titles
|
article <url> Fetch full article content for a specific URL
|
||||||
items [max] Title + URL pairs (tab-separated)
|
urls [max] Article URLs only
|
||||||
raw [max] Raw RSS XML
|
titles [max] Article titles only
|
||||||
|
raw [max] Full RSS XML
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -27,17 +27,36 @@ fetch_feed() {
|
||||||
$CURL "${FIVEFILTERS_URL}/makefulltextfeed.php?url=${encoded_url}&max=${max}&links=preserve&exc="
|
$CURL "${FIVEFILTERS_URL}/makefulltextfeed.php?url=${encoded_url}&max=${max}&links=preserve&exc="
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fetch_article() {
|
||||||
|
local url="$1"
|
||||||
|
local encoded_url=$(printf '%s' "$url" | sed 's/:/%3A/g; s/\//%2F/g; s/\?/%3F/g; s/&/%26/g; s/=/%3D/g')
|
||||||
|
$CURL "${FIVEFILTERS_URL}/makefulltextfeed.php?url=${encoded_url}&max=1&links=preserve&exc=" | \
|
||||||
|
perl -0777 -ne 'print $1 if /<item>.*?<description>(.*?)<\/description>.*?<\/item>/s' | \
|
||||||
|
sed 's/</</g; s/>/>/g; s/"/"/g; s/&/\&/g' | \
|
||||||
|
sed 's/<[^>]*>//g' | \
|
||||||
|
tr '\n' ' ' | sed 's/ */ /g'
|
||||||
|
}
|
||||||
|
|
||||||
|
decode_entities() {
|
||||||
|
sed 's/&amp;/\&/g; s/&/\&/g; s/</</g; s/>/>/g; s/"/"/g; s/'/'"'"'/g'
|
||||||
|
}
|
||||||
|
|
||||||
case "${1:-}" in
|
case "${1:-}" in
|
||||||
|
items)
|
||||||
|
feed=$(fetch_feed "${2:-50}")
|
||||||
|
titles=$(echo "$feed" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities)
|
||||||
|
urls=$(echo "$feed" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story")
|
||||||
|
paste <(echo "$titles") <(echo "$urls") 2>/dev/null | head -"${2:-50}"
|
||||||
|
;;
|
||||||
|
article)
|
||||||
|
[ -z "${2:-}" ] && { echo "Usage: derstandard article <url>"; exit 1; }
|
||||||
|
fetch_article "$2"
|
||||||
|
;;
|
||||||
urls)
|
urls)
|
||||||
fetch_feed "${2:-50}" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story"
|
fetch_feed "${2:-50}" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story"
|
||||||
;;
|
;;
|
||||||
titles)
|
titles)
|
||||||
fetch_feed "${2:-50}" | grep -oP '<title><!\[CDATA\[\K[^\]]+' | tail -n +2
|
fetch_feed "${2:-50}" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities
|
||||||
;;
|
|
||||||
items)
|
|
||||||
feed=$(fetch_feed "${2:-50}")
|
|
||||||
paste <(echo "$feed" | grep -oP '<title><!\[CDATA\[\K[^\]]+' | tail -n +2) \
|
|
||||||
<(echo "$feed" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story") 2>/dev/null | head -50
|
|
||||||
;;
|
;;
|
||||||
raw)
|
raw)
|
||||||
fetch_feed "${2:-50}"
|
fetch_feed "${2:-50}"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue