Implement hybrid approach for AI news

- Update ainews script to detect OpenAI URLs and mark as NEEDS_WEB_FETCH - Update TOOLS.md with content availability table and hybrid workflow - Update all 4 AI news cron jobs (10:05, 14:05, 18:05, 22:05) with hybrid instructions - Simon/Raschka: use ainews articles (fivefilters works) - OpenAI: use web_fetch tool (JS-heavy site)
2026-02-03 22:28:31 +00:00 · 2026-02-03 22:28:31 +00:00 · c7e2d429c0
commit c7e2d429c0
parent e6248879b3
5 changed files with 228 additions and 23 deletions
--- a/bin/ainews
+++ b/bin/ainews
@ -20,8 +20,12 @@ Commands:
  items --all [max]        All items including seen
  article <url>            Fetch article content via fivefilters proxy
  articles <url1,url2,...> Fetch multiple articles + mark as seen
+                           (OpenAI URLs print NEEDS_WEB_FETCH - use web_fetch tool)
  seen                     Show seen count and recent entries
  reset                    Clear seen history
+
+Note: OpenAI's site is JS-rendered, fivefilters can't extract it.
+      For OpenAI articles, use the web_fetch tool directly.
 EOF
 }

@ -122,7 +126,13 @@ case "${1:-}" in
    for url in "${URLS[@]}"; do
      title=$(echo "$url" | grep -oP '[^/]+$' | sed 's/-/ /g; s/\..*//; s/.*/\u&/')
      echo "=== ${title} ==="
-      fetch_single_article "$url"
+      # OpenAI is JS-heavy, fivefilters can't extract it - needs web_fetch tool
+      if [[ "$url" == *"openai.com"* ]]; then
+        echo "NEEDS_WEB_FETCH: $url"
+        echo "(OpenAI's site is JS-rendered - use web_fetch tool instead)"
+      else
+        fetch_single_article "$url"
+      fi
      echo ""
      echo ""
      mark_seen "$url"