derstandard: auto-track seen articles
- items now shows only NEW (unseen) articles by default - items --all shows everything including seen - articles command auto-marks URLs as seen - Added seen/reset commands for state management - State stored in memory/derstandard-seen.txt
This commit is contained in:
parent
1eac52a97c
commit
252fcb3ad0
4 changed files with 78 additions and 21 deletions
21
TOOLS.md
21
TOOLS.md
|
|
@ -45,20 +45,25 @@ When user arrives home, HA calls the webhook. Check `memory/arrival-reminders.js
|
||||||
Helper script: `~/bin/derstandard`
|
Helper script: `~/bin/derstandard`
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
derstandard items [max] # Title + URL pairs (tab-separated)
|
derstandard items [max] # NEW items only (filters out seen)
|
||||||
derstandard article <url> # Full article content for a specific URL
|
derstandard items --all [max] # All items including already seen
|
||||||
derstandard articles <url1>,<url2>,... # Fetch multiple articles (comma-separated)
|
derstandard article <url> # Full article content for a specific URL
|
||||||
derstandard urls [max] # Article URLs only (default: 50)
|
derstandard articles <url1>,<url2>,... # Fetch multiple + auto-mark as seen
|
||||||
derstandard titles [max] # Article titles only
|
derstandard urls [max] # Article URLs only (default: 50)
|
||||||
derstandard raw [max] # Full RSS XML
|
derstandard titles [max] # Article titles only
|
||||||
|
derstandard seen # Show seen count and recent entries
|
||||||
|
derstandard reset # Clear seen history
|
||||||
|
derstandard raw [max] # Full RSS XML
|
||||||
```
|
```
|
||||||
|
|
||||||
- Uses internal fivefilters proxy (bypasses web_fetch private IP block)
|
- Uses internal fivefilters proxy (bypasses web_fetch private IP block)
|
||||||
- Pre-processes output for minimal token usage
|
- Pre-processes output for minimal token usage
|
||||||
|
- **Auto-tracks seen articles** in `memory/derstandard-seen.txt`
|
||||||
|
|
||||||
**Workflow for news briefing:**
|
**Workflow for news briefing:**
|
||||||
1. `derstandard items` → pick interesting titles
|
1. `derstandard items` → only shows NEW articles (unseen)
|
||||||
2. `derstandard articles <url1>,<url2>,...` → get full content for selected articles
|
2. `derstandard articles <url1>,<url2>,...` → fetch content + auto-mark as seen
|
||||||
|
3. Next briefing: step 1 automatically excludes previously covered articles
|
||||||
|
|
||||||
## Forgejo Git Access
|
## Forgejo Git Access
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,19 +4,27 @@ set -e
|
||||||
|
|
||||||
FIVEFILTERS_URL="https://fivefilters.cloonar.com"
|
FIVEFILTERS_URL="https://fivefilters.cloonar.com"
|
||||||
RSS_SOURCE="https://www.derstandard.at/rss"
|
RSS_SOURCE="https://www.derstandard.at/rss"
|
||||||
|
SEEN_FILE="${DERSTANDARD_SEEN_FILE:-$HOME/clawd/memory/derstandard-seen.txt}"
|
||||||
|
|
||||||
CURL="curl -sk"
|
CURL="curl -sk"
|
||||||
|
|
||||||
|
# Ensure seen file exists
|
||||||
|
mkdir -p "$(dirname "$SEEN_FILE")"
|
||||||
|
touch "$SEEN_FILE"
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
Usage: derstandard <command> [args]
|
Usage: derstandard <command> [args]
|
||||||
|
|
||||||
Commands:
|
Commands:
|
||||||
items [max] Title + URL pairs for selection (default: 50)
|
items [max] NEW items only (unseen), title + URL pairs
|
||||||
|
items --all [max] All items including seen
|
||||||
article <url> Fetch single article content
|
article <url> Fetch single article content
|
||||||
articles <url1,url2,...> Fetch multiple articles (comma-separated)
|
articles <url1,url2,...> Fetch multiple articles + mark as seen
|
||||||
urls [max] Article URLs only
|
urls [max] Article URLs only (all)
|
||||||
titles [max] Article titles only
|
titles [max] Article titles only (all)
|
||||||
|
seen Show seen count and recent entries
|
||||||
|
reset Clear seen history
|
||||||
raw [max] Full RSS XML
|
raw [max] Full RSS XML
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
@ -41,12 +49,41 @@ decode_entities() {
|
||||||
sed 's/&amp;/\&/g; s/&/\&/g; s/</</g; s/>/>/g; s/"/"/g; s/'/'"'"'/g'
|
sed 's/&amp;/\&/g; s/&/\&/g; s/</</g; s/>/>/g; s/"/"/g; s/'/'"'"'/g'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mark_seen() {
|
||||||
|
local url="$1"
|
||||||
|
# Only add if not already present
|
||||||
|
if ! grep -qF "$url" "$SEEN_FILE" 2>/dev/null; then
|
||||||
|
echo "$url" >> "$SEEN_FILE"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
is_seen() {
|
||||||
|
grep -qF "$1" "$SEEN_FILE" 2>/dev/null
|
||||||
|
}
|
||||||
|
|
||||||
case "${1:-}" in
|
case "${1:-}" in
|
||||||
items)
|
items)
|
||||||
feed=$(fetch_feed "${2:-50}")
|
show_all=false
|
||||||
titles=$(echo "$feed" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities)
|
max=50
|
||||||
urls=$(echo "$feed" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story")
|
shift || true
|
||||||
paste <(echo "$titles") <(echo "$urls") 2>/dev/null | head -"${2:-50}"
|
if [ "${1:-}" = "--all" ]; then
|
||||||
|
show_all=true
|
||||||
|
shift || true
|
||||||
|
fi
|
||||||
|
[ -n "${1:-}" ] && max="$1"
|
||||||
|
|
||||||
|
feed=$(fetch_feed "$max")
|
||||||
|
|
||||||
|
# Build items list
|
||||||
|
while IFS=$'\t' read -r title url; do
|
||||||
|
if $show_all || ! is_seen "$url"; then
|
||||||
|
printf '%s\t%s\n' "$title" "$url"
|
||||||
|
fi
|
||||||
|
done < <(
|
||||||
|
titles=$(echo "$feed" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities)
|
||||||
|
urls=$(echo "$feed" | grep -oP '<link>\K[^<]+' | grep "derstandard.at/story")
|
||||||
|
paste <(echo "$titles") <(echo "$urls") 2>/dev/null | head -"$max"
|
||||||
|
)
|
||||||
;;
|
;;
|
||||||
article)
|
article)
|
||||||
[ -z "${2:-}" ] && { echo "Usage: derstandard article <url>"; exit 1; }
|
[ -z "${2:-}" ] && { echo "Usage: derstandard article <url>"; exit 1; }
|
||||||
|
|
@ -62,6 +99,8 @@ case "${1:-}" in
|
||||||
fetch_single_article "$url"
|
fetch_single_article "$url"
|
||||||
echo ""
|
echo ""
|
||||||
echo ""
|
echo ""
|
||||||
|
# Mark as seen
|
||||||
|
mark_seen "$url"
|
||||||
done
|
done
|
||||||
;;
|
;;
|
||||||
urls)
|
urls)
|
||||||
|
|
@ -70,6 +109,18 @@ case "${1:-}" in
|
||||||
titles)
|
titles)
|
||||||
fetch_feed "${2:-50}" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities
|
fetch_feed "${2:-50}" | grep -oP '<title>\K[^<]+' | tail -n +2 | decode_entities
|
||||||
;;
|
;;
|
||||||
|
seen)
|
||||||
|
count=$(wc -l < "$SEEN_FILE" 2>/dev/null | tr -d ' ')
|
||||||
|
echo "Seen: $count URLs"
|
||||||
|
if [ "$count" -gt 0 ]; then
|
||||||
|
echo "Recent:"
|
||||||
|
tail -5 "$SEEN_FILE"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
reset)
|
||||||
|
> "$SEEN_FILE"
|
||||||
|
echo "Cleared seen history"
|
||||||
|
;;
|
||||||
raw)
|
raw)
|
||||||
fetch_feed "${2:-50}"
|
fetch_feed "${2:-50}"
|
||||||
;;
|
;;
|
||||||
|
|
|
||||||
1
memory/derstandard-seen.txt
Normal file
1
memory/derstandard-seen.txt
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
https://www.derstandard.at/story/3000000307063/usa-schiessen-iranische-drohne-nahe-flugzeugtraeger-im-arabischen-meer-ab?ref=rss
|
||||||
|
|
@ -7,10 +7,10 @@
|
||||||
},
|
},
|
||||||
"lastChecks": {
|
"lastChecks": {
|
||||||
"news": "2026-01-30T08:17:00Z",
|
"news": "2026-01-30T08:17:00Z",
|
||||||
"rheinmetall": "2026-02-02T04:10:00Z",
|
"rheinmetall": "2026-02-03T22:00:00Z",
|
||||||
"rheinmetall_price": 1755.00,
|
"rheinmetall_price": 1773.50,
|
||||||
"calendar": "2026-02-02T04:10:00Z",
|
"calendar": "2026-02-03T22:00:00Z",
|
||||||
"steam_hardware": "2026-01-31T07:56:00Z",
|
"steam_hardware": "2026-02-03T22:00:00Z",
|
||||||
"notes": "RHM: €1,755.00, below threshold. Calendar: Gurkerl delivery 14:00 (no action needed). Steam hardware: 'early 2026' window, no official price or specific date yet."
|
"notes": "RHM: €1,773.50, below threshold. Calendar Feb 4: 'nyc' meeting at 12:00 (video call, no action). Steam hardware: still 'early 2026', no official price/date."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue