derstandard: auto-track seen articles
- items now shows only NEW (unseen) articles by default - items --all shows everything including seen - articles command auto-marks URLs as seen - Added seen/reset commands for state management - State stored in memory/derstandard-seen.txt
This commit is contained in:
parent
1eac52a97c
commit
252fcb3ad0
4 changed files with 78 additions and 21 deletions
|
|
@ -4,19 +4,27 @@ set -e
|
|||
|
||||
FIVEFILTERS_URL="https://fivefilters.cloonar.com"
|
||||
RSS_SOURCE="https://www.derstandard.at/rss"
|
||||
SEEN_FILE="${DERSTANDARD_SEEN_FILE:-$HOME/clawd/memory/derstandard-seen.txt}"
|
||||
|
||||
CURL="curl -sk"
|
||||
|
||||
# Ensure seen file exists
|
||||
mkdir -p "$(dirname "$SEEN_FILE")"
|
||||
touch "$SEEN_FILE"
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: derstandard <command> [args]
|
||||
|
||||
Commands:
|
||||
items [max] Title + URL pairs for selection (default: 50)
|
||||
items [max] NEW items only (unseen), title + URL pairs
|
||||
items --all [max] All items including seen
|
||||
article <url> Fetch single article content
|
||||
articles <url1,url2,...> Fetch multiple articles (comma-separated)
|
||||
urls [max] Article URLs only
|
||||
titles [max] Article titles only
|
||||
articles <url1,url2,...> Fetch multiple articles + mark as seen
|
||||
urls [max] Article URLs only (all)
|
||||
titles [max] Article titles only (all)
|
||||
seen Show seen count and recent entries
|
||||
reset Clear seen history
|
||||
raw [max] Full RSS XML
|
||||
EOF
|
||||
}
|
||||
|
|
@ -41,12 +49,41 @@ decode_entities() {
|
|||
sed 's/&amp;/\&/g; s/&/\&/g; s/</</g; s/>/>/g; s/"/"/g; s/'/'"'"'/g'
|
||||
}
|
||||
|
||||
mark_seen() {
|
||||
local url="$1"
|
||||
# Only add if not already present
|
||||
if ! grep -qF "$url" "$SEEN_FILE" 2>/dev/null; then
|
||||
echo "$url" >> "$SEEN_FILE"
|
||||
fi
|
||||
}
|
||||
|
||||
is_seen() {
|
||||
grep -qF "$1" "$SEEN_FILE" 2>/dev/null
|
||||
}
|
||||
|
||||
case "${1:-}" in
|
||||
items)
|
||||
feed=$(fetch_feed "${2:-50}")
|
||||
titles=$(echo "$feed" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities)
|
||||
urls=$(echo "$feed" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story")
|
||||
paste <(echo "$titles") <(echo "$urls") 2>/dev/null | head -"${2:-50}"
|
||||
show_all=false
|
||||
max=50
|
||||
shift || true
|
||||
if [ "${1:-}" = "--all" ]; then
|
||||
show_all=true
|
||||
shift || true
|
||||
fi
|
||||
[ -n "${1:-}" ] && max="$1"
|
||||
|
||||
feed=$(fetch_feed "$max")
|
||||
|
||||
# Build items list
|
||||
while IFS=$'\t' read -r title url; do
|
||||
if $show_all || ! is_seen "$url"; then
|
||||
printf '%s\t%s\n' "$title" "$url"
|
||||
fi
|
||||
done < <(
|
||||
titles=$(echo "$feed" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities)
|
||||
urls=$(echo "$feed" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story")
|
||||
paste <(echo "$titles") <(echo "$urls") 2>/dev/null | head -"$max"
|
||||
)
|
||||
;;
|
||||
article)
|
||||
[ -z "${2:-}" ] && { echo "Usage: derstandard article <url>"; exit 1; }
|
||||
|
|
@ -62,6 +99,8 @@ case "${1:-}" in
|
|||
fetch_single_article "$url"
|
||||
echo ""
|
||||
echo ""
|
||||
# Mark as seen
|
||||
mark_seen "$url"
|
||||
done
|
||||
;;
|
||||
urls)
|
||||
|
|
@ -70,6 +109,18 @@ case "${1:-}" in
|
|||
titles)
|
||||
fetch_feed "${2:-50}" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities
|
||||
;;
|
||||
seen)
|
||||
count=$(wc -l < "$SEEN_FILE" 2>/dev/null | tr -d ' ')
|
||||
echo "Seen: $count URLs"
|
||||
if [ "$count" -gt 0 ]; then
|
||||
echo "Recent:"
|
||||
tail -5 "$SEEN_FILE"
|
||||
fi
|
||||
;;
|
||||
reset)
|
||||
> "$SEEN_FILE"
|
||||
echo "Cleared seen history"
|
||||
;;
|
||||
raw)
|
||||
fetch_feed "${2:-50}"
|
||||
;;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue