#!/bin/bash # AI News RSS helper - aggregates multiple AI-focused feeds set -e FIVEFILTERS_URL="https://fivefilters.cloonar.com" SEEN_FILE="${AINEWS_SEEN_FILE:-$HOME/clawd/memory/ainews-seen.txt}" CURL="curl -skL" # Ensure seen file exists mkdir -p "$(dirname "$SEEN_FILE")" touch "$SEEN_FILE" usage() { cat < [args] Commands: items [max] NEW items only (unseen), title + source + URL items --all [max] All items including seen article Fetch article content via fivefilters proxy articles Fetch multiple articles + mark as seen seen Show seen count and recent entries reset Clear seen history EOF } mark_seen() { local url="$1" if ! grep -qF "$url" "$SEEN_FILE" 2>/dev/null; then echo "$url" >> "$SEEN_FILE" fi } prune_seen() { local count=$(wc -l < "$SEEN_FILE" 2>/dev/null | tr -d ' ') if [ "$count" -gt 200 ]; then tail -200 "$SEEN_FILE" > "${SEEN_FILE}.tmp" && mv "${SEEN_FILE}.tmp" "$SEEN_FILE" fi } is_seen() { grep -qF "$1" "$SEEN_FILE" 2>/dev/null } decode_entities() { sed 's/&amp;/\&/g; s/&/\&/g; s/<//g; s/"/"/g; s/'/'"'"'/g; s///g' } fetch_simon_willison() { $CURL "https://simonwillison.net/atom/everything/" 2>/dev/null | \ tr '\n' ' ' | sed 's//\n/g' | grep '' | head -"${1:-20}" | \ while read -r entry; do title=$(echo "$entry" | grep -oP '(?<=)[^<]+' | head -1) url=$(echo "$entry" | grep -oP '(?<=<link href=")[^"]+(?=" rel="alternate")' | head -1) [ -n "$url" ] && [ -n "$title" ] && printf '%s\t[Willison]\t%s\n' "$(echo "$title" | decode_entities)" "$url" done } fetch_openai() { $CURL "https://openai.com/news/rss.xml" 2>/dev/null | \ tr '\n' ' ' | sed 's/<item>/\n<item>/g' | grep '<item>' | head -"${1:-10}" | \ while read -r item; do title=$(echo "$item" | grep -oP '(?<=<title>)<!\[CDATA\[\K[^\]]+' | head -1) url=$(echo "$item" | grep -oP '(?<=<link>)[^<]+' | head -1) [ -n "$url" ] && [ -n "$title" ] && printf '%s\t[OpenAI]\t%s\n' "$title" "$url" done } fetch_raschka() { $CURL "https://magazine.sebastianraschka.com/feed" 2>/dev/null | \ tr '\n' ' ' | sed 's/<item>/\n<item>/g' | grep '<item>' | head -"${1:-10}" | \ while read -r item; do title=$(echo "$item" | grep -oP '(?<=<title>)<!\[CDATA\[\K[^\]]+' | head -1) url=$(echo "$item" | grep -oP '(?<=<link>)[^<]+' | head -1) [ -n "$url" ] && [ -n "$title" ] && printf '%s\t[Raschka]\t%s\n' "$title" "$url" done } fetch_single_article() { local url="$1" local encoded_url=$(printf '%s' "$url" | sed 's/:/%3A/g; s/\//%2F/g; s/\?/%3F/g; s/&/%26/g; s/=/%3D/g') $CURL "${FIVEFILTERS_URL}/makefulltextfeed.php?url=${encoded_url}&max=1&links=preserve&exc=" | \ perl -0777 -ne 'print $1 if /<item>.*?<description>(.*?)<\/description>.*?<\/item>/s' | \ sed 's/</</g; s/>/>/g; s/"/"/g; s/&/\&/g' | \ sed 's/<[^>]*>//g' | \ tr '\n' ' ' | sed 's/ */ /g' } case "${1:-}" in items) show_all=false max=50 shift || true if [ "${1:-}" = "--all" ]; then show_all=true shift || true fi [ -n "${1:-}" ] && max="$1" # Collect all items from all feeds { fetch_simon_willison "$max" fetch_openai "$max" fetch_raschka "$max" } | while IFS=$'\t' read -r title source url; do if $show_all || ! is_seen "$url"; then printf '%s\t%s\t%s\n' "$title" "$source" "$url" fi mark_seen "$url" done prune_seen ;; article) [ -z "${2:-}" ] && { echo "Usage: ainews article <url>"; exit 1; } fetch_single_article "$2" ;; articles) [ -z "${2:-}" ] && { echo "Usage: ainews articles <url1,url2,...>"; exit 1; } IFS=',' read -ra URLS <<< "$2" for url in "${URLS[@]}"; do title=$(echo "$url" | grep -oP '[^/]+$' | sed 's/-/ /g; s/\..*//; s/.*/\u&/') echo "=== ${title} ===" fetch_single_article "$url" echo "" echo "" mark_seen "$url" done prune_seen ;; seen) count=$(wc -l < "$SEEN_FILE" 2>/dev/null | tr -d ' ') echo "Seen: $count URLs" if [ "$count" -gt 0 ]; then echo "Recent:" tail -5 "$SEEN_FILE" fi ;; reset) > "$SEEN_FILE" echo "Cleared seen history" ;; *) usage ;; esac