ainews: add AI news helper script with seen tracking
- Aggregates Simon Willison, OpenAI Blog, Sebastian Raschka - Same workflow as derstandard: items marks as seen, auto-prunes at 200 - Updated all 4 AI news cron jobs to use new script - Removed obsolete ai-news-seen.json
This commit is contained in:
parent
d0fa82ccc6
commit
e6248879b3
4 changed files with 186 additions and 32 deletions
147
bin/ainews
Executable file
147
bin/ainews
Executable file
|
|
@ -0,0 +1,147 @@
|
|||
#!/bin/bash
|
||||
# AI News RSS helper - aggregates multiple AI-focused feeds
|
||||
set -e
|
||||
|
||||
FIVEFILTERS_URL="https://fivefilters.cloonar.com"
|
||||
SEEN_FILE="${AINEWS_SEEN_FILE:-$HOME/clawd/memory/ainews-seen.txt}"
|
||||
|
||||
CURL="curl -skL"
|
||||
|
||||
# Ensure seen file exists
|
||||
mkdir -p "$(dirname "$SEEN_FILE")"
|
||||
touch "$SEEN_FILE"
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: ainews <command> [args]
|
||||
|
||||
Commands:
|
||||
items [max] NEW items only (unseen), title + source + URL
|
||||
items --all [max] All items including seen
|
||||
article <url> Fetch article content via fivefilters proxy
|
||||
articles <url1,url2,...> Fetch multiple articles + mark as seen
|
||||
seen Show seen count and recent entries
|
||||
reset Clear seen history
|
||||
EOF
|
||||
}
|
||||
|
||||
mark_seen() {
|
||||
local url="$1"
|
||||
if ! grep -qF "$url" "$SEEN_FILE" 2>/dev/null; then
|
||||
echo "$url" >> "$SEEN_FILE"
|
||||
fi
|
||||
}
|
||||
|
||||
prune_seen() {
|
||||
local count=$(wc -l < "$SEEN_FILE" 2>/dev/null | tr -d ' ')
|
||||
if [ "$count" -gt 200 ]; then
|
||||
tail -200 "$SEEN_FILE" > "${SEEN_FILE}.tmp" && mv "${SEEN_FILE}.tmp" "$SEEN_FILE"
|
||||
fi
|
||||
}
|
||||
|
||||
is_seen() {
|
||||
grep -qF "$1" "$SEEN_FILE" 2>/dev/null
|
||||
}
|
||||
|
||||
decode_entities() {
|
||||
sed 's/&amp;/\&/g; s/&/\&/g; s/</</g; s/>/>/g; s/"/"/g; s/'/'"'"'/g; s/<!\[CDATA\[//g; s/\]\]>//g'
|
||||
}
|
||||
|
||||
fetch_simon_willison() {
|
||||
$CURL "https://simonwillison.net/atom/everything/" 2>/dev/null | \
|
||||
tr '\n' ' ' | sed 's/<entry>/\n<entry>/g' | grep '<entry>' | head -"${1:-20}" | \
|
||||
while read -r entry; do
|
||||
title=$(echo "$entry" | grep -oP '(?<=<title>)[^<]+' | head -1)
|
||||
url=$(echo "$entry" | grep -oP '(?<=<link href=")[^"]+(?=" rel="alternate")' | head -1)
|
||||
[ -n "$url" ] && [ -n "$title" ] && printf '%s\t[Willison]\t%s\n' "$(echo "$title" | decode_entities)" "$url"
|
||||
done
|
||||
}
|
||||
|
||||
fetch_openai() {
|
||||
$CURL "https://openai.com/news/rss.xml" 2>/dev/null | \
|
||||
tr '\n' ' ' | sed 's/<item>/\n<item>/g' | grep '<item>' | head -"${1:-10}" | \
|
||||
while read -r item; do
|
||||
title=$(echo "$item" | grep -oP '(?<=<title>)<!\[CDATA\[\K[^\]]+' | head -1)
|
||||
url=$(echo "$item" | grep -oP '(?<=<link>)[^<]+' | head -1)
|
||||
[ -n "$url" ] && [ -n "$title" ] && printf '%s\t[OpenAI]\t%s\n' "$title" "$url"
|
||||
done
|
||||
}
|
||||
|
||||
fetch_raschka() {
|
||||
$CURL "https://magazine.sebastianraschka.com/feed" 2>/dev/null | \
|
||||
tr '\n' ' ' | sed 's/<item>/\n<item>/g' | grep '<item>' | head -"${1:-10}" | \
|
||||
while read -r item; do
|
||||
title=$(echo "$item" | grep -oP '(?<=<title>)<!\[CDATA\[\K[^\]]+' | head -1)
|
||||
url=$(echo "$item" | grep -oP '(?<=<link>)[^<]+' | head -1)
|
||||
[ -n "$url" ] && [ -n "$title" ] && printf '%s\t[Raschka]\t%s\n' "$title" "$url"
|
||||
done
|
||||
}
|
||||
|
||||
fetch_single_article() {
|
||||
local url="$1"
|
||||
local encoded_url=$(printf '%s' "$url" | sed 's/:/%3A/g; s/\//%2F/g; s/\?/%3F/g; s/&/%26/g; s/=/%3D/g')
|
||||
$CURL "${FIVEFILTERS_URL}/makefulltextfeed.php?url=${encoded_url}&max=1&links=preserve&exc=" | \
|
||||
perl -0777 -ne 'print $1 if /<item>.*?<description>(.*?)<\/description>.*?<\/item>/s' | \
|
||||
sed 's/</</g; s/>/>/g; s/"/"/g; s/&/\&/g' | \
|
||||
sed 's/<[^>]*>//g' | \
|
||||
tr '\n' ' ' | sed 's/ */ /g'
|
||||
}
|
||||
|
||||
case "${1:-}" in
|
||||
items)
|
||||
show_all=false
|
||||
max=50
|
||||
shift || true
|
||||
if [ "${1:-}" = "--all" ]; then
|
||||
show_all=true
|
||||
shift || true
|
||||
fi
|
||||
[ -n "${1:-}" ] && max="$1"
|
||||
|
||||
# Collect all items from all feeds
|
||||
{
|
||||
fetch_simon_willison "$max"
|
||||
fetch_openai "$max"
|
||||
fetch_raschka "$max"
|
||||
} | while IFS=$'\t' read -r title source url; do
|
||||
if $show_all || ! is_seen "$url"; then
|
||||
printf '%s\t%s\t%s\n' "$title" "$source" "$url"
|
||||
fi
|
||||
mark_seen "$url"
|
||||
done
|
||||
|
||||
prune_seen
|
||||
;;
|
||||
article)
|
||||
[ -z "${2:-}" ] && { echo "Usage: ainews article <url>"; exit 1; }
|
||||
fetch_single_article "$2"
|
||||
;;
|
||||
articles)
|
||||
[ -z "${2:-}" ] && { echo "Usage: ainews articles <url1,url2,...>"; exit 1; }
|
||||
IFS=',' read -ra URLS <<< "$2"
|
||||
for url in "${URLS[@]}"; do
|
||||
title=$(echo "$url" | grep -oP '[^/]+$' | sed 's/-/ /g; s/\..*//; s/.*/\u&/')
|
||||
echo "=== ${title} ==="
|
||||
fetch_single_article "$url"
|
||||
echo ""
|
||||
echo ""
|
||||
mark_seen "$url"
|
||||
done
|
||||
prune_seen
|
||||
;;
|
||||
seen)
|
||||
count=$(wc -l < "$SEEN_FILE" 2>/dev/null | tr -d ' ')
|
||||
echo "Seen: $count URLs"
|
||||
if [ "$count" -gt 0 ]; then
|
||||
echo "Recent:"
|
||||
tail -5 "$SEEN_FILE"
|
||||
fi
|
||||
;;
|
||||
reset)
|
||||
> "$SEEN_FILE"
|
||||
echo "Cleared seen history"
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
Loading…
Add table
Add a link
Reference in a new issue