diff --git a/TOOLS.md b/TOOLS.md index 32a8ef9..dcd3ed9 100644 --- a/TOOLS.md +++ b/TOOLS.md @@ -103,6 +103,30 @@ curl -s -X REPORT -u "$NEXTCLOUD_USER:$NEXTCLOUD_PASS" \ "$NEXTCLOUD_URL/remote.php/dav/calendars/$NEXTCLOUD_USER/$CALDAV_CALENDAR/" ``` +## AI News RSS + +Helper script: `~/bin/ainews` + +```bash +ainews items [max] # NEW items only (filters out seen) +ainews items --all [max] # All items including already seen +ainews article # Full article content via fivefilters +ainews articles ,,... # Fetch multiple + auto-mark as seen +ainews seen # Show seen count and recent entries +ainews reset # Clear seen history +``` + +- Aggregates: Simon Willison, OpenAI Blog, Sebastian Raschka +- Auto-tracks seen articles in `memory/ainews-seen.txt` +- Auto-prunes to 200 entries + +**Workflow for AI news briefing:** +1. `ainews items` → shows NEW articles, marks them as seen +2. Pick interesting ones, optionally fetch full content with `articles` +3. Next briefing: only shows articles published since last check + +--- + ## Der Standard RSS Summaries - **Schedule:** 4× daily: 10:00, 14:00, 18:00, 22:00 (Vienna time) diff --git a/bin/ainews b/bin/ainews new file mode 100755 index 0000000..abd449b --- /dev/null +++ b/bin/ainews @@ -0,0 +1,147 @@ +#!/bin/bash +# AI News RSS helper - aggregates multiple AI-focused feeds +set -e + +FIVEFILTERS_URL="https://fivefilters.cloonar.com" +SEEN_FILE="${AINEWS_SEEN_FILE:-$HOME/clawd/memory/ainews-seen.txt}" + +CURL="curl -skL" + +# Ensure seen file exists +mkdir -p "$(dirname "$SEEN_FILE")" +touch "$SEEN_FILE" + +usage() { + cat < [args] + +Commands: + items [max] NEW items only (unseen), title + source + URL + items --all [max] All items including seen + article Fetch article content via fivefilters proxy + articles Fetch multiple articles + mark as seen + seen Show seen count and recent entries + reset Clear seen history +EOF +} + +mark_seen() { + local url="$1" + if ! grep -qF "$url" "$SEEN_FILE" 2>/dev/null; then + echo "$url" >> "$SEEN_FILE" + fi +} + +prune_seen() { + local count=$(wc -l < "$SEEN_FILE" 2>/dev/null | tr -d ' ') + if [ "$count" -gt 200 ]; then + tail -200 "$SEEN_FILE" > "${SEEN_FILE}.tmp" && mv "${SEEN_FILE}.tmp" "$SEEN_FILE" + fi +} + +is_seen() { + grep -qF "$1" "$SEEN_FILE" 2>/dev/null +} + +decode_entities() { + sed 's/&amp;/\&/g; s/&/\&/g; s/<//g; s/"/"/g; s/'/'"'"'/g; s///g' +} + +fetch_simon_willison() { + $CURL "https://simonwillison.net/atom/everything/" 2>/dev/null | \ + tr '\n' ' ' | sed 's//\n/g' | grep '' | head -"${1:-20}" | \ + while read -r entry; do + title=$(echo "$entry" | grep -oP '(?<=)[^<]+' | head -1) + url=$(echo "$entry" | grep -oP '(?<=<link href=")[^"]+(?=" rel="alternate")' | head -1) + [ -n "$url" ] && [ -n "$title" ] && printf '%s\t[Willison]\t%s\n' "$(echo "$title" | decode_entities)" "$url" + done +} + +fetch_openai() { + $CURL "https://openai.com/news/rss.xml" 2>/dev/null | \ + tr '\n' ' ' | sed 's/<item>/\n<item>/g' | grep '<item>' | head -"${1:-10}" | \ + while read -r item; do + title=$(echo "$item" | grep -oP '(?<=<title>)<!\[CDATA\[\K[^\]]+' | head -1) + url=$(echo "$item" | grep -oP '(?<=<link>)[^<]+' | head -1) + [ -n "$url" ] && [ -n "$title" ] && printf '%s\t[OpenAI]\t%s\n' "$title" "$url" + done +} + +fetch_raschka() { + $CURL "https://magazine.sebastianraschka.com/feed" 2>/dev/null | \ + tr '\n' ' ' | sed 's/<item>/\n<item>/g' | grep '<item>' | head -"${1:-10}" | \ + while read -r item; do + title=$(echo "$item" | grep -oP '(?<=<title>)<!\[CDATA\[\K[^\]]+' | head -1) + url=$(echo "$item" | grep -oP '(?<=<link>)[^<]+' | head -1) + [ -n "$url" ] && [ -n "$title" ] && printf '%s\t[Raschka]\t%s\n' "$title" "$url" + done +} + +fetch_single_article() { + local url="$1" + local encoded_url=$(printf '%s' "$url" | sed 's/:/%3A/g; s/\//%2F/g; s/\?/%3F/g; s/&/%26/g; s/=/%3D/g') + $CURL "${FIVEFILTERS_URL}/makefulltextfeed.php?url=${encoded_url}&max=1&links=preserve&exc=" | \ + perl -0777 -ne 'print $1 if /<item>.*?<description>(.*?)<\/description>.*?<\/item>/s' | \ + sed 's/</</g; s/>/>/g; s/"/"/g; s/&/\&/g' | \ + sed 's/<[^>]*>//g' | \ + tr '\n' ' ' | sed 's/ */ /g' +} + +case "${1:-}" in + items) + show_all=false + max=50 + shift || true + if [ "${1:-}" = "--all" ]; then + show_all=true + shift || true + fi + [ -n "${1:-}" ] && max="$1" + + # Collect all items from all feeds + { + fetch_simon_willison "$max" + fetch_openai "$max" + fetch_raschka "$max" + } | while IFS=$'\t' read -r title source url; do + if $show_all || ! is_seen "$url"; then + printf '%s\t%s\t%s\n' "$title" "$source" "$url" + fi + mark_seen "$url" + done + + prune_seen + ;; + article) + [ -z "${2:-}" ] && { echo "Usage: ainews article <url>"; exit 1; } + fetch_single_article "$2" + ;; + articles) + [ -z "${2:-}" ] && { echo "Usage: ainews articles <url1,url2,...>"; exit 1; } + IFS=',' read -ra URLS <<< "$2" + for url in "${URLS[@]}"; do + title=$(echo "$url" | grep -oP '[^/]+$' | sed 's/-/ /g; s/\..*//; s/.*/\u&/') + echo "=== ${title} ===" + fetch_single_article "$url" + echo "" + echo "" + mark_seen "$url" + done + prune_seen + ;; + seen) + count=$(wc -l < "$SEEN_FILE" 2>/dev/null | tr -d ' ') + echo "Seen: $count URLs" + if [ "$count" -gt 0 ]; then + echo "Recent:" + tail -5 "$SEEN_FILE" + fi + ;; + reset) + > "$SEEN_FILE" + echo "Cleared seen history" + ;; + *) + usage + ;; +esac diff --git a/memory/ai-news-seen.json b/memory/ai-news-seen.json deleted file mode 100644 index 4524ed2..0000000 --- a/memory/ai-news-seen.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "urls": [ - "https://openai.com/index/sora-feed-philosophy", - "https://openai.com/index/snowflake-partnership", - "https://magazine.sebastianraschka.com/p/categories-of-inference-time-scaling", - "https://simonwillison.net/2026/Feb/3/january/", - "https://simonwillison.net/2026/Feb/3/brandon-sanderson/", - "https://openai.com/index/taisei", - "https://openai.com/index/the-next-chapter-for-ai-in-the-eu", - "https://venturebeat.com/infrastructure/railway-secures-usd100-million-to-challenge-aws-with-ai-native-cloud", - "https://venturebeat.com/infrastructure/claude-code-costs-up-to-usd200-a-month-goose-does-the-same-thing-for-free", - "https://simonwillison.net/2026/Feb/2/introducing-the-codex-app/", - "https://simonwillison.net/2026/Feb/2/no-humans-allowed/", - "https://openai.com/index/introducing-the-codex-app", - "https://openai.com/index/retiring-gpt-4o-and-older-models", - "https://simonwillison.net/2026/Feb/1/openclaw-in-docker/", - "https://magazine.sebastianraschka.com/p/state-of-llms-2025", - "https://openai.com/index/inside-our-in-house-data-agent", - "https://openai.com/index/introducing-gpt-5-2-codex", - "https://openai.com/index/unrolling-the-codex-agent-loop", - "https://simonwillison.net/2026/Jan/31/andrej-karpathy/", - "https://simonwillison.net/2026/Jan/30/steve-yegge/", - "https://simonwillison.net/2026/Jan/30/moltbook/", - "https://openai.com/index/introducing-prism", - "https://openai.com/index/scaling-postgresql", - "https://openai.com/index/praktika", - "https://openai.com/index/gpt-5-2-for-science-and-math", - "https://openai.com/index/disney-sora-agreement", - "https://openai.com/index/ten-years" - ], - "lastUpdated": "2026-02-03T21:05:00Z" -} diff --git a/memory/ainews-seen.txt b/memory/ainews-seen.txt new file mode 100644 index 0000000..6e9215f --- /dev/null +++ b/memory/ainews-seen.txt @@ -0,0 +1,15 @@ +https://simonwillison.net/2026/Feb/3/january/#atom-everything +https://simonwillison.net/2026/Feb/3/brandon-sanderson/#atom-everything +https://simonwillison.net/2026/Feb/2/introducing-the-codex-app/#atom-everything +https://simonwillison.net/2026/Feb/2/no-humans-allowed/#atom-everything +https://simonwillison.net/2026/Feb/1/openclaw-in-docker/#atom-everything +https://openai.com/index/sora-feed-philosophy +https://openai.com/index/snowflake-partnership +https://openai.com/index/introducing-the-codex-app +https://openai.com/index/inside-our-in-house-data-agent +https://openai.com/index/retiring-gpt-4o-and-older-models +https://magazine.sebastianraschka.com/p/categories-of-inference-time-scaling +https://magazine.sebastianraschka.com/p/state-of-llms-2025 +https://magazine.sebastianraschka.com/p/llm-research-papers-2025-part2 +https://magazine.sebastianraschka.com/p/technical-deepseek +https://magazine.sebastianraschka.com/p/beyond-standard-llms