- items now shows only NEW (unseen) articles by default - items --all shows everything including seen - articles command auto-marks URLs as seen - Added seen/reset commands for state management - State stored in memory/derstandard-seen.txt
130 lines
3.6 KiB
Bash
Executable file
130 lines
3.6 KiB
Bash
Executable file
#!/bin/bash
|
|
# Der Standard RSS helper - fetches via internal fivefilters
|
|
set -e
|
|
|
|
FIVEFILTERS_URL="https://fivefilters.cloonar.com"
|
|
RSS_SOURCE="https://www.derstandard.at/rss"
|
|
SEEN_FILE="${DERSTANDARD_SEEN_FILE:-$HOME/clawd/memory/derstandard-seen.txt}"
|
|
|
|
CURL="curl -sk"
|
|
|
|
# Ensure seen file exists
|
|
mkdir -p "$(dirname "$SEEN_FILE")"
|
|
touch "$SEEN_FILE"
|
|
|
|
usage() {
|
|
cat <<EOF
|
|
Usage: derstandard <command> [args]
|
|
|
|
Commands:
|
|
items [max] NEW items only (unseen), title + URL pairs
|
|
items --all [max] All items including seen
|
|
article <url> Fetch single article content
|
|
articles <url1,url2,...> Fetch multiple articles + mark as seen
|
|
urls [max] Article URLs only (all)
|
|
titles [max] Article titles only (all)
|
|
seen Show seen count and recent entries
|
|
reset Clear seen history
|
|
raw [max] Full RSS XML
|
|
EOF
|
|
}
|
|
|
|
fetch_feed() {
|
|
local max="${1:-50}"
|
|
local encoded_url=$(printf '%s' "$RSS_SOURCE" | sed 's/:/%3A/g; s/\//%2F/g')
|
|
$CURL "${FIVEFILTERS_URL}/makefulltextfeed.php?url=${encoded_url}&max=${max}&links=preserve&exc="
|
|
}
|
|
|
|
fetch_single_article() {
|
|
local url="$1"
|
|
local encoded_url=$(printf '%s' "$url" | sed 's/:/%3A/g; s/\//%2F/g; s/\?/%3F/g; s/&/%26/g; s/=/%3D/g')
|
|
$CURL "${FIVEFILTERS_URL}/makefulltextfeed.php?url=${encoded_url}&max=1&links=preserve&exc=" | \
|
|
perl -0777 -ne 'print $1 if /<item>.*?<description>(.*?)<\/description>.*?<\/item>/s' | \
|
|
sed 's/</</g; s/>/>/g; s/"/"/g; s/&/\&/g' | \
|
|
sed 's/<[^>]*>//g' | \
|
|
tr '\n' ' ' | sed 's/ */ /g'
|
|
}
|
|
|
|
decode_entities() {
|
|
sed 's/&amp;/\&/g; s/&/\&/g; s/</</g; s/>/>/g; s/"/"/g; s/'/'"'"'/g'
|
|
}
|
|
|
|
mark_seen() {
|
|
local url="$1"
|
|
# Only add if not already present
|
|
if ! grep -qF "$url" "$SEEN_FILE" 2>/dev/null; then
|
|
echo "$url" >> "$SEEN_FILE"
|
|
fi
|
|
}
|
|
|
|
is_seen() {
|
|
grep -qF "$1" "$SEEN_FILE" 2>/dev/null
|
|
}
|
|
|
|
case "${1:-}" in
|
|
items)
|
|
show_all=false
|
|
max=50
|
|
shift || true
|
|
if [ "${1:-}" = "--all" ]; then
|
|
show_all=true
|
|
shift || true
|
|
fi
|
|
[ -n "${1:-}" ] && max="$1"
|
|
|
|
feed=$(fetch_feed "$max")
|
|
|
|
# Build items list
|
|
while IFS=$'\t' read -r title url; do
|
|
if $show_all || ! is_seen "$url"; then
|
|
printf '%s\t%s\n' "$title" "$url"
|
|
fi
|
|
done < <(
|
|
titles=$(echo "$feed" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities)
|
|
urls=$(echo "$feed" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story")
|
|
paste <(echo "$titles") <(echo "$urls") 2>/dev/null | head -"$max"
|
|
)
|
|
;;
|
|
article)
|
|
[ -z "${2:-}" ] && { echo "Usage: derstandard article <url>"; exit 1; }
|
|
fetch_single_article "$2"
|
|
;;
|
|
articles)
|
|
[ -z "${2:-}" ] && { echo "Usage: derstandard articles <url1,url2,...>"; exit 1; }
|
|
IFS=',' read -ra URLS <<< "$2"
|
|
for url in "${URLS[@]}"; do
|
|
# Extract title from URL slug
|
|
title=$(echo "$url" | grep -oP '/\d+/\K[^?]+' | tr '-' ' ' | sed 's/.*/\u&/')
|
|
echo "=== ${title} ==="
|
|
fetch_single_article "$url"
|
|
echo ""
|
|
echo ""
|
|
# Mark as seen
|
|
mark_seen "$url"
|
|
done
|
|
;;
|
|
urls)
|
|
fetch_feed "${2:-50}" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story"
|
|
;;
|
|
titles)
|
|
fetch_feed "${2:-50}" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities
|
|
;;
|
|
seen)
|
|
count=$(wc -l < "$SEEN_FILE" 2>/dev/null | tr -d ' ')
|
|
echo "Seen: $count URLs"
|
|
if [ "$count" -gt 0 ]; then
|
|
echo "Recent:"
|
|
tail -5 "$SEEN_FILE"
|
|
fi
|
|
;;
|
|
reset)
|
|
> "$SEEN_FILE"
|
|
echo "Cleared seen history"
|
|
;;
|
|
raw)
|
|
fetch_feed "${2:-50}"
|
|
;;
|
|
*)
|
|
usage
|
|
;;
|
|
esac
|