Add modular capture backends, resolution profiles, Wayland support

This commit is contained in:
Hoid 2026-02-14 10:00:19 +00:00
parent 3b363192f2
commit 80ba9b1b90
16 changed files with 2266 additions and 45 deletions

View file

@ -26,7 +26,7 @@ plugins/
├── routines/ Farming routines (Mephisto, Pindle, Countess)
├── loot/ Default loot filter rules (YAML)
└── templates/ UI template images
web/ React dashboard (future)
web/ SolidJS dashboard (future)
config/ YAML configuration
```
@ -66,7 +66,7 @@ go build -o iso-bot ./cmd/iso-bot
| Screen capture | Platform-native (Win32 / X11) |
| Input simulation | Platform-native (SendInput / uinput) |
| API | net/http + gorilla/websocket |
| Dashboard | React + TypeScript (planned) |
| Dashboard | SolidJS + TypeScript (planned) |
| Config | YAML |
| Loot filter | Declarative YAML rules |

89
config/d2r.yaml Normal file
View file

@ -0,0 +1,89 @@
game: d2r
capture:
backend: window_x11 # or window_win32, wayland, vnc, spice, file
window_title: "Diablo II: Resurrected"
# VNC settings (if backend is vnc)
# vnc_host: "192.168.1.100"
# vnc_port: 5900
# vnc_password: "secretpassword"
# SPICE settings (if backend is spice)
# spice_host: "192.168.1.100"
# spice_port: 5900
# spice_password: "secretpassword"
# Wayland settings (if backend is wayland)
# output: "DP-1" # Wayland output name
# use_pipewire: true
# File settings (if backend is file)
# path: "/path/to/images/or/video"
# type: "image" # or "video", "sequence", "directory"
# frame_rate: 30.0
# loop: true
resolution: "1920x1080" # must match a supported profile (1920x1080 or 1280x720)
routine: mephisto
safety:
session_min_hours: 1.0
session_max_hours: 4.0
break_min_minutes: 10
break_max_minutes: 45
max_daily_hours: 12
# Loot filter settings
loot:
pickup_uniques: true
pickup_sets: true
pickup_rares: true
pickup_runes: true
min_rune_tier: 10 # El=1, Eld=2, ..., Zod=33
pickup_gems: false
# Safety thresholds (0.0 - 1.0)
health_potion_threshold: 0.5
mana_potion_threshold: 0.3
chicken_threshold: 0.2 # exit game if health below this
# Color detection ranges (HSV)
colors:
health_filled:
lower_h: 0
lower_s: 100
lower_v: 100
upper_h: 10
upper_s: 255
upper_v: 255
mana_filled:
lower_h: 100
lower_s: 100
lower_v: 100
upper_h: 130
upper_s: 255
upper_v: 255
item_unique:
lower_h: 15
lower_s: 100
lower_v: 180
upper_h: 30
upper_s: 255
upper_v: 255
# Game timing constants
timings:
loading_screen_max_ms: 15000
town_portal_cast_ms: 3500
teleport_delay_ms: 150
potion_cooldown_ms: 1000
pickup_delay_ms: 300
api:
addr: ":8080"
logging:
level: info
file: logs/bot.log

View file

@ -0,0 +1,375 @@
// File-based capture for testing and development.
package backends
import (
"fmt"
"image"
"image/gif"
"image/jpeg"
"image/png"
"os"
"path/filepath"
"strings"
"time"
"git.cloonar.com/openclawd/iso-bot/pkg/engine/capture"
)
// FileConfig holds configuration for file-based capture.
type FileConfig struct {
// Path is the file or directory path to read from.
Path string `yaml:"path"`
// Type specifies the input type: "image", "video", "sequence", "directory".
Type string `yaml:"type"`
// FrameRate for video playback or image sequence (frames per second).
FrameRate float64 `yaml:"frame_rate"`
// Loop enables looping for video or image sequences.
Loop bool `yaml:"loop"`
// Pattern is the filename pattern for image sequences (e.g., "frame_%04d.png").
Pattern string `yaml:"pattern"`
// StartFrame for image sequences or video (0-based).
StartFrame int `yaml:"start_frame"`
// EndFrame for image sequences or video (-1 for end of sequence).
EndFrame int `yaml:"end_frame"`
}
// FileSource provides capture from static images, image sequences, or video files.
type FileSource struct {
config FileConfig
currentFrame int
totalFrames int
frameData []image.Image
lastCapture time.Time
frameInterval time.Duration
width int
height int
}
// NewFileSource creates a file-based capture source.
func NewFileSource(configMap map[string]interface{}) (capture.Source, error) {
var config FileConfig
// Extract config from map
if path, ok := configMap["path"].(string); ok {
config.Path = path
} else {
return nil, fmt.Errorf("file path is required")
}
if fileType, ok := configMap["type"].(string); ok {
config.Type = fileType
} else {
config.Type = "auto" // Auto-detect based on path
}
if frameRate, ok := configMap["frame_rate"].(float64); ok {
config.FrameRate = frameRate
} else {
config.FrameRate = 30.0 // Default 30 FPS
}
if loop, ok := configMap["loop"].(bool); ok {
config.Loop = loop
} else {
config.Loop = true // Default to looping
}
if pattern, ok := configMap["pattern"].(string); ok {
config.Pattern = pattern
}
if startFrame, ok := configMap["start_frame"].(int); ok {
config.StartFrame = startFrame
}
if endFrame, ok := configMap["end_frame"].(int); ok {
config.EndFrame = endFrame
} else {
config.EndFrame = -1 // End of sequence
}
source := &FileSource{
config: config,
currentFrame: config.StartFrame,
frameInterval: time.Duration(float64(time.Second) / config.FrameRate),
}
// Load file(s) and initialize
if err := source.initialize(); err != nil {
return nil, fmt.Errorf("failed to initialize file source: %w", err)
}
return source, nil
}
// Name returns a description of this capture source.
func (f *FileSource) Name() string {
return fmt.Sprintf("File: %s", filepath.Base(f.config.Path))
}
// Capture grabs a single frame from the file source.
func (f *FileSource) Capture() (image.Image, error) {
if len(f.frameData) == 0 {
return nil, fmt.Errorf("no frames loaded")
}
// Respect frame rate timing
if !f.lastCapture.IsZero() {
elapsed := time.Since(f.lastCapture)
if elapsed < f.frameInterval {
time.Sleep(f.frameInterval - elapsed)
}
}
f.lastCapture = time.Now()
// Get current frame
if f.currentFrame >= len(f.frameData) {
if f.config.Loop {
f.currentFrame = f.config.StartFrame
} else {
return nil, fmt.Errorf("end of sequence reached")
}
}
frame := f.frameData[f.currentFrame]
f.currentFrame++
// Check end frame limit
if f.config.EndFrame > 0 && f.currentFrame > f.config.EndFrame {
if f.config.Loop {
f.currentFrame = f.config.StartFrame
} else {
return frame, fmt.Errorf("end frame reached")
}
}
return frame, nil
}
// CaptureRegion grabs a sub-region of the current frame.
func (f *FileSource) CaptureRegion(r capture.Region) (image.Image, error) {
fullFrame, err := f.Capture()
if err != nil {
return nil, err
}
// Crop the image to the specified region
bounds := image.Rect(r.X, r.Y, r.X+r.Width, r.Y+r.Height)
// Check if the region is within bounds
imgBounds := fullFrame.Bounds()
if !bounds.In(imgBounds) {
bounds = bounds.Intersect(imgBounds)
if bounds.Empty() {
return nil, fmt.Errorf("region is outside image bounds")
}
}
return fullFrame.(interface{
SubImage(r image.Rectangle) image.Image
}).SubImage(bounds), nil
}
// Size returns the frame dimensions.
func (f *FileSource) Size() (width, height int) {
return f.width, f.height
}
// Close releases file resources.
func (f *FileSource) Close() error {
// Clear frame data to free memory
f.frameData = nil
return nil
}
// initialize loads the file(s) and prepares frame data.
func (f *FileSource) initialize() error {
// Auto-detect type if not specified
if f.config.Type == "auto" {
f.config.Type = f.detectType()
}
switch f.config.Type {
case "image":
return f.loadSingleImage()
case "sequence":
return f.loadImageSequence()
case "directory":
return f.loadDirectory()
case "video":
return f.loadVideo()
default:
return fmt.Errorf("unsupported file type: %s", f.config.Type)
}
}
// detectType automatically determines the file type.
func (f *FileSource) detectType() string {
info, err := os.Stat(f.config.Path)
if err != nil {
return "image" // Default fallback
}
if info.IsDir() {
return "directory"
}
ext := strings.ToLower(filepath.Ext(f.config.Path))
switch ext {
case ".png", ".jpg", ".jpeg", ".gif", ".bmp":
return "image"
case ".mp4", ".avi", ".mov", ".mkv", ".webm":
return "video"
default:
// Check if it looks like a sequence pattern
if strings.Contains(f.config.Path, "%") || strings.Contains(f.config.Path, "*") {
return "sequence"
}
return "image"
}
}
// loadSingleImage loads a single image file.
func (f *FileSource) loadSingleImage() error {
img, err := f.loadImage(f.config.Path)
if err != nil {
return err
}
f.frameData = []image.Image{img}
f.totalFrames = 1
f.width = img.Bounds().Dx()
f.height = img.Bounds().Dy()
return nil
}
// loadImageSequence loads a numbered sequence of images.
func (f *FileSource) loadImageSequence() error {
var frames []image.Image
// Generate filenames based on pattern
for i := f.config.StartFrame; f.config.EndFrame < 0 || i <= f.config.EndFrame; i++ {
filename := fmt.Sprintf(f.config.Pattern, i)
if !filepath.IsAbs(filename) {
filename = filepath.Join(filepath.Dir(f.config.Path), filename)
}
// Check if file exists
if _, err := os.Stat(filename); os.IsNotExist(err) {
if len(frames) == 0 {
return fmt.Errorf("no images found matching pattern: %s", f.config.Pattern)
}
break // End of sequence
}
img, err := f.loadImage(filename)
if err != nil {
return fmt.Errorf("failed to load %s: %w", filename, err)
}
frames = append(frames, img)
}
if len(frames) == 0 {
return fmt.Errorf("no frames loaded from sequence")
}
f.frameData = frames
f.totalFrames = len(frames)
f.width = frames[0].Bounds().Dx()
f.height = frames[0].Bounds().Dy()
return nil
}
// loadDirectory loads all images from a directory.
func (f *FileSource) loadDirectory() error {
entries, err := os.ReadDir(f.config.Path)
if err != nil {
return err
}
var frames []image.Image
for _, entry := range entries {
if entry.IsDir() {
continue
}
filename := entry.Name()
ext := strings.ToLower(filepath.Ext(filename))
// Check if it's a supported image format
if !isImageFile(ext) {
continue
}
fullPath := filepath.Join(f.config.Path, filename)
img, err := f.loadImage(fullPath)
if err != nil {
continue // Skip files that can't be loaded
}
frames = append(frames, img)
}
if len(frames) == 0 {
return fmt.Errorf("no image files found in directory: %s", f.config.Path)
}
f.frameData = frames
f.totalFrames = len(frames)
f.width = frames[0].Bounds().Dx()
f.height = frames[0].Bounds().Dy()
return nil
}
// loadVideo loads frames from a video file.
func (f *FileSource) loadVideo() error {
// TODO: Implement video decoding
// This would require a video decoding library like FFmpeg
// For now, return an error
return fmt.Errorf("video capture not implemented yet - use image sequences instead")
}
// loadImage loads a single image file.
func (f *FileSource) loadImage(path string) (image.Image, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
// Determine format based on file extension
ext := strings.ToLower(filepath.Ext(path))
switch ext {
case ".png":
return png.Decode(file)
case ".jpg", ".jpeg":
return jpeg.Decode(file)
case ".gif":
return gif.Decode(file)
default:
// Try to decode generically
img, _, err := image.Decode(file)
return img, err
}
}
// isImageFile checks if the file extension indicates an image file.
func isImageFile(ext string) bool {
switch ext {
case ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp":
return true
default:
return false
}
}

View file

@ -0,0 +1,313 @@
// Monitor capture for full screen or monitor region capture (cross-platform).
package backends
import (
"fmt"
"image"
"runtime"
"git.cloonar.com/openclawd/iso-bot/pkg/engine/capture"
)
// MonitorConfig holds configuration for monitor capture.
type MonitorConfig struct {
// MonitorIndex specifies which monitor to capture (0-based).
// -1 captures the primary monitor.
MonitorIndex int `yaml:"monitor_index"`
// Region defines a specific area to capture.
// If nil, captures the entire monitor.
Region *capture.Region `yaml:"region"`
// IncludeCursor captures the mouse cursor.
IncludeCursor bool `yaml:"include_cursor"`
// Method specifies the capture method.
// Options: "auto", "gdi", "dxgi", "x11", "wayland"
Method string `yaml:"method"`
}
// MonitorSource captures from a monitor or screen region.
type MonitorSource struct {
config MonitorConfig
monitors []MonitorInfo
activeIndex int
width int
height int
impl MonitorCaptureImpl
}
// MonitorInfo describes a display monitor.
type MonitorInfo struct {
Index int
Name string
X int
Y int
Width int
Height int
Primary bool
ScaleFactor float64
}
// MonitorCaptureImpl defines platform-specific capture implementation.
type MonitorCaptureImpl interface {
Initialize() error
CaptureMonitor(monitor MonitorInfo, region *capture.Region) (image.Image, error)
Close() error
}
// NewMonitorSource creates a monitor capture source.
func NewMonitorSource(configMap map[string]interface{}) (capture.Source, error) {
var config MonitorConfig
// Extract config from map
if index, ok := configMap["monitor_index"].(int); ok {
config.MonitorIndex = index
} else {
config.MonitorIndex = -1 // Primary monitor
}
if regionMap, ok := configMap["region"].(map[string]interface{}); ok {
region := &capture.Region{}
if x, ok := regionMap["x"].(int); ok {
region.X = x
}
if y, ok := regionMap["y"].(int); ok {
region.Y = y
}
if width, ok := regionMap["width"].(int); ok {
region.Width = width
}
if height, ok := regionMap["height"].(int); ok {
region.Height = height
}
config.Region = region
}
if cursor, ok := configMap["include_cursor"].(bool); ok {
config.IncludeCursor = cursor
} else {
config.IncludeCursor = true
}
if method, ok := configMap["method"].(string); ok {
config.Method = method
} else {
config.Method = "auto"
}
source := &MonitorSource{
config: config,
}
// Initialize platform-specific implementation
if err := source.initImpl(); err != nil {
return nil, fmt.Errorf("failed to initialize monitor capture: %w", err)
}
return source, nil
}
// Name returns a description of this capture source.
func (m *MonitorSource) Name() string {
if m.config.MonitorIndex >= 0 {
return fmt.Sprintf("Monitor %d", m.config.MonitorIndex)
}
return "Primary Monitor"
}
// Capture grabs a single frame from the monitor.
func (m *MonitorSource) Capture() (image.Image, error) {
if m.impl == nil {
return nil, fmt.Errorf("monitor capture not initialized")
}
monitor, err := m.getTargetMonitor()
if err != nil {
return nil, err
}
return m.impl.CaptureMonitor(monitor, m.config.Region)
}
// CaptureRegion grabs a sub-region of the monitor.
func (m *MonitorSource) CaptureRegion(r capture.Region) (image.Image, error) {
if m.impl == nil {
return nil, fmt.Errorf("monitor capture not initialized")
}
monitor, err := m.getTargetMonitor()
if err != nil {
return nil, err
}
return m.impl.CaptureMonitor(monitor, &r)
}
// Size returns the monitor dimensions.
func (m *MonitorSource) Size() (width, height int) {
if m.config.Region != nil {
return m.config.Region.Width, m.config.Region.Height
}
return m.width, m.height
}
// Close releases monitor capture resources.
func (m *MonitorSource) Close() error {
if m.impl != nil {
return m.impl.Close()
}
return nil
}
// initImpl initializes the platform-specific capture implementation.
func (m *MonitorSource) initImpl() error {
var impl MonitorCaptureImpl
var err error
// Choose implementation based on platform and method
switch {
case m.config.Method == "auto":
impl, err = m.createAutoImpl()
case m.config.Method == "gdi" && runtime.GOOS == "windows":
impl, err = NewGDICapture(m.config)
case m.config.Method == "dxgi" && runtime.GOOS == "windows":
impl, err = NewDXGICapture(m.config)
case m.config.Method == "x11" && runtime.GOOS == "linux":
impl, err = NewX11MonitorCapture(m.config)
case m.config.Method == "wayland" && runtime.GOOS == "linux":
impl, err = NewWaylandMonitorCapture(m.config)
default:
return fmt.Errorf("unsupported capture method %q for platform %s", m.config.Method, runtime.GOOS)
}
if err != nil {
return err
}
m.impl = impl
return m.impl.Initialize()
}
// createAutoImpl selects the best implementation for the current platform.
func (m *MonitorSource) createAutoImpl() (MonitorCaptureImpl, error) {
switch runtime.GOOS {
case "windows":
// Prefer DXGI for better performance, fallback to GDI
if impl, err := NewDXGICapture(m.config); err == nil {
return impl, nil
}
return NewGDICapture(m.config)
case "linux":
// Try Wayland first, fallback to X11
if impl, err := NewWaylandMonitorCapture(m.config); err == nil {
return impl, nil
}
return NewX11MonitorCapture(m.config)
case "darwin":
return NewMacOSCapture(m.config)
default:
return nil, fmt.Errorf("unsupported platform: %s", runtime.GOOS)
}
}
// getTargetMonitor returns the monitor to capture based on configuration.
func (m *MonitorSource) getTargetMonitor() (MonitorInfo, error) {
if len(m.monitors) == 0 {
return MonitorInfo{}, fmt.Errorf("no monitors detected")
}
if m.config.MonitorIndex == -1 {
// Find primary monitor
for _, monitor := range m.monitors {
if monitor.Primary {
return monitor, nil
}
}
// If no primary found, use first monitor
return m.monitors[0], nil
}
if m.config.MonitorIndex >= len(m.monitors) {
return MonitorInfo{}, fmt.Errorf("monitor index %d out of range (have %d monitors)", m.config.MonitorIndex, len(m.monitors))
}
return m.monitors[m.config.MonitorIndex], nil
}
// enumerateMonitors discovers available monitors.
func (m *MonitorSource) enumerateMonitors() error {
// TODO: Implement monitor enumeration for each platform
// This would use platform-specific APIs to discover monitors
m.monitors = []MonitorInfo{
{
Index: 0,
Name: "Primary Display",
X: 0,
Y: 0,
Width: 1920,
Height: 1080,
Primary: true,
ScaleFactor: 1.0,
},
}
return nil
}
// Platform-specific capture implementations (stubs for now)
// NewGDICapture creates a GDI-based capture implementation for Windows.
func NewGDICapture(config MonitorConfig) (MonitorCaptureImpl, error) {
// TODO: Implement GDI capture using GetDC/BitBlt
return &GDICapture{config: config}, nil
}
// NewDXGICapture creates a DXGI-based capture implementation for Windows.
func NewDXGICapture(config MonitorConfig) (MonitorCaptureImpl, error) {
// TODO: Implement DXGI Desktop Duplication API capture
return &DXGICapture{config: config}, nil
}
// NewX11MonitorCapture creates an X11-based capture implementation for Linux.
func NewX11MonitorCapture(config MonitorConfig) (MonitorCaptureImpl, error) {
// TODO: Implement X11 root window capture using XGetImage
return &X11MonitorCapture{config: config}, nil
}
// NewWaylandMonitorCapture creates a Wayland-based capture implementation for Linux.
func NewWaylandMonitorCapture(config MonitorConfig) (MonitorCaptureImpl, error) {
// TODO: Implement Wayland capture using PipeWire/xdg-desktop-portal
return &WaylandMonitorCapture{config: config}, nil
}
// NewMacOSCapture creates a macOS capture implementation.
func NewMacOSCapture(config MonitorConfig) (MonitorCaptureImpl, error) {
// TODO: Implement macOS capture using CGDisplayCreateImage
return &MacOSCapture{config: config}, nil
}
// Stub implementations
type GDICapture struct{ config MonitorConfig }
func (g *GDICapture) Initialize() error { return fmt.Errorf("GDI capture not implemented") }
func (g *GDICapture) CaptureMonitor(monitor MonitorInfo, region *capture.Region) (image.Image, error) { return nil, fmt.Errorf("GDI capture not implemented") }
func (g *GDICapture) Close() error { return nil }
type DXGICapture struct{ config MonitorConfig }
func (d *DXGICapture) Initialize() error { return fmt.Errorf("DXGI capture not implemented") }
func (d *DXGICapture) CaptureMonitor(monitor MonitorInfo, region *capture.Region) (image.Image, error) { return nil, fmt.Errorf("DXGI capture not implemented") }
func (d *DXGICapture) Close() error { return nil }
type X11MonitorCapture struct{ config MonitorConfig }
func (x *X11MonitorCapture) Initialize() error { return fmt.Errorf("X11 monitor capture not implemented") }
func (x *X11MonitorCapture) CaptureMonitor(monitor MonitorInfo, region *capture.Region) (image.Image, error) { return nil, fmt.Errorf("X11 monitor capture not implemented") }
func (x *X11MonitorCapture) Close() error { return nil }
type WaylandMonitorCapture struct{ config MonitorConfig }
func (w *WaylandMonitorCapture) Initialize() error { return fmt.Errorf("Wayland monitor capture not implemented") }
func (w *WaylandMonitorCapture) CaptureMonitor(monitor MonitorInfo, region *capture.Region) (image.Image, error) { return nil, fmt.Errorf("Wayland monitor capture not implemented") }
func (w *WaylandMonitorCapture) Close() error { return nil }
type MacOSCapture struct{ config MonitorConfig }
func (m *MacOSCapture) Initialize() error { return fmt.Errorf("macOS capture not implemented") }
func (m *MacOSCapture) CaptureMonitor(monitor MonitorInfo, region *capture.Region) (image.Image, error) { return nil, fmt.Errorf("macOS capture not implemented") }
func (m *MacOSCapture) Close() error { return nil }

View file

@ -0,0 +1,142 @@
// Package backends provides modular screen capture implementations.
//
// Supports various capture sources:
// - Window capture (Win32, X11)
// - Wayland desktop capture
// - VNC client capture
// - SPICE capture for VMs
// - Monitor/screen region capture
// - File-based capture (for testing)
package backends
import (
"fmt"
"strings"
"git.cloonar.com/openclawd/iso-bot/pkg/engine/capture"
)
// BackendType identifies a capture backend implementation.
type BackendType string
const (
BackendWindowWin32 BackendType = "window_win32"
BackendWindowX11 BackendType = "window_x11"
BackendWayland BackendType = "wayland"
BackendVNC BackendType = "vnc"
BackendSpice BackendType = "spice"
BackendMonitor BackendType = "monitor"
BackendFile BackendType = "file"
)
// BackendInfo describes a capture backend.
type BackendInfo struct {
Type BackendType
Name string
Description string
Available bool // true if backend can be used on this platform
}
// Registry manages available capture backends.
type Registry struct {
backends map[BackendType]func(map[string]interface{}) (capture.Source, error)
}
// NewRegistry creates a new backend registry.
func NewRegistry() *Registry {
return &Registry{
backends: make(map[BackendType]func(map[string]interface{}) (capture.Source, error)),
}
}
// Register registers a backend constructor.
func (r *Registry) Register(backendType BackendType, constructor func(map[string]interface{}) (capture.Source, error)) {
r.backends[backendType] = constructor
}
// Create creates a capture source of the specified type with the given config.
func (r *Registry) Create(backendType BackendType, config map[string]interface{}) (capture.Source, error) {
constructor, exists := r.backends[backendType]
if !exists {
return nil, fmt.Errorf("backend %q not registered", backendType)
}
return constructor(config)
}
// ListAvailable returns information about available backends.
func (r *Registry) ListAvailable() []BackendInfo {
var infos []BackendInfo
for backendType := range r.backends {
info := BackendInfo{
Type: backendType,
Available: true,
}
switch backendType {
case BackendWindowWin32:
info.Name = "Windows Window Capture"
info.Description = "Capture specific window using BitBlt/DXGI APIs"
case BackendWindowX11:
info.Name = "X11 Window Capture"
info.Description = "Capture specific window using XGetImage"
case BackendWayland:
info.Name = "Wayland Screen Capture"
info.Description = "Screen capture via PipeWire/xdg-desktop-portal"
case BackendVNC:
info.Name = "VNC Client Capture"
info.Description = "Capture frames from VNC server"
case BackendSpice:
info.Name = "SPICE VM Capture"
info.Description = "Capture from QEMU/KVM SPICE display"
case BackendMonitor:
info.Name = "Monitor Capture"
info.Description = "Full screen or monitor region capture"
case BackendFile:
info.Name = "File Input"
info.Description = "Read from image files or video streams"
}
infos = append(infos, info)
}
return infos
}
// ParseBackendType parses a backend type string.
func ParseBackendType(s string) (BackendType, error) {
switch strings.ToLower(s) {
case "window_win32":
return BackendWindowWin32, nil
case "window_x11":
return BackendWindowX11, nil
case "wayland":
return BackendWayland, nil
case "vnc":
return BackendVNC, nil
case "spice":
return BackendSpice, nil
case "monitor":
return BackendMonitor, nil
case "file":
return BackendFile, nil
default:
return "", fmt.Errorf("unknown backend type: %q", s)
}
}
// GetDefault returns the default backend registry with all available backends registered.
func GetDefault() *Registry {
reg := NewRegistry()
// Register platform-specific backends
reg.Register(BackendWindowWin32, NewWin32Source)
reg.Register(BackendWindowX11, NewX11Source)
reg.Register(BackendWayland, NewWaylandSource)
reg.Register(BackendVNC, NewVNCSource)
reg.Register(BackendSpice, NewSpiceSource)
reg.Register(BackendMonitor, NewMonitorSource)
reg.Register(BackendFile, NewFileSource)
return reg
}

View file

@ -0,0 +1,303 @@
// SPICE protocol capture for QEMU/KVM virtual machines.
package backends
import (
"fmt"
"image"
"net"
"time"
"git.cloonar.com/openclawd/iso-bot/pkg/engine/capture"
)
// SpiceConfig holds configuration for SPICE capture.
type SpiceConfig struct {
// Host is the SPICE server hostname or IP address.
Host string `yaml:"host"`
// Port is the SPICE server port (default 5900).
Port int `yaml:"port"`
// Password for SPICE authentication (optional).
Password string `yaml:"password"`
// TLSPort is the secure SPICE port (if using TLS).
TLSPort int `yaml:"tls_port"`
// UseTLS enables encrypted connection.
UseTLS bool `yaml:"use_tls"`
// CACertFile path to CA certificate for TLS verification.
CACertFile string `yaml:"ca_cert_file"`
// Channels specifies which SPICE channels to use (display, inputs, etc.).
Channels []string `yaml:"channels"`
// ConnectTimeout is the timeout for initial connection.
ConnectTimeoutMs int `yaml:"connect_timeout_ms"`
// ImageCompression sets the preferred image compression type.
ImageCompression string `yaml:"image_compression"` // "auto", "quic", "glz", "lz"
}
// SpiceSource captures frames from a QEMU/KVM VM via SPICE protocol.
type SpiceSource struct {
config SpiceConfig
conn net.Conn
sessionID uint32
channels map[string]*SpiceChannel
displays []*SpiceDisplay
width int
height int
connected bool
}
// SpiceChannel represents a SPICE protocol channel.
type SpiceChannel struct {
Type string
ID int
Conn net.Conn
Sequence uint64
}
// SpiceDisplay represents a display surface in SPICE.
type SpiceDisplay struct {
ID uint32
Width int
Height int
Format SpicePixelFormat
Data []byte
}
// SpicePixelFormat describes pixel format used by SPICE.
type SpicePixelFormat struct {
BitsPerPixel int
BytesPerPixel int
RedShift int
GreenShift int
BlueShift int
RedMask uint32
GreenMask uint32
BlueMask uint32
}
// NewSpiceSource creates a SPICE capture source.
func NewSpiceSource(configMap map[string]interface{}) (capture.Source, error) {
var config SpiceConfig
// Extract config from map
if host, ok := configMap["host"].(string); ok {
config.Host = host
} else {
return nil, fmt.Errorf("spice host is required")
}
if port, ok := configMap["port"].(int); ok {
config.Port = port
} else {
config.Port = 5900 // Default SPICE port
}
if password, ok := configMap["password"].(string); ok {
config.Password = password
}
if tlsPort, ok := configMap["tls_port"].(int); ok {
config.TLSPort = tlsPort
} else {
config.TLSPort = 5901 // Default TLS port
}
if useTLS, ok := configMap["use_tls"].(bool); ok {
config.UseTLS = useTLS
}
if caCert, ok := configMap["ca_cert_file"].(string); ok {
config.CACertFile = caCert
}
if channels, ok := configMap["channels"].([]string); ok {
config.Channels = channels
} else {
config.Channels = []string{"display", "inputs"} // Default channels
}
if connectTimeout, ok := configMap["connect_timeout_ms"].(int); ok {
config.ConnectTimeoutMs = connectTimeout
} else {
config.ConnectTimeoutMs = 10000 // 10 seconds
}
if compression, ok := configMap["image_compression"].(string); ok {
config.ImageCompression = compression
} else {
config.ImageCompression = "auto"
}
return &SpiceSource{
config: config,
channels: make(map[string]*SpiceChannel),
displays: make([]*SpiceDisplay, 0),
}, nil
}
// Name returns a description of this capture source.
func (s *SpiceSource) Name() string {
return fmt.Sprintf("SPICE: %s:%d", s.config.Host, s.config.Port)
}
// Capture grabs a single frame from the SPICE display.
func (s *SpiceSource) Capture() (image.Image, error) {
if !s.connected {
if err := s.connect(); err != nil {
return nil, fmt.Errorf("failed to connect to SPICE server: %w", err)
}
}
// TODO: Implement SPICE frame capture
// 1. Check for display updates on the display channel
// 2. Process SPICE display commands (draw operations)
// 3. Update local framebuffer with received display data
// 4. Convert framebuffer to Go image.Image
return nil, fmt.Errorf("SPICE capture not implemented yet")
}
// CaptureRegion grabs a sub-region of the SPICE display.
func (s *SpiceSource) CaptureRegion(r capture.Region) (image.Image, error) {
// TODO: Implement region capture
// SPICE may support partial updates, otherwise crop full frame
fullFrame, err := s.Capture()
if err != nil {
return nil, err
}
// Crop the image to the specified region
bounds := image.Rect(r.X, r.Y, r.X+r.Width, r.Y+r.Height)
return fullFrame.(interface{
SubImage(r image.Rectangle) image.Image
}).SubImage(bounds), nil
}
// Size returns the SPICE display dimensions.
func (s *SpiceSource) Size() (width, height int) {
return s.width, s.height
}
// Close disconnects from the SPICE server.
func (s *SpiceSource) Close() error {
s.connected = false
// Close all channels
for _, channel := range s.channels {
if channel.Conn != nil {
channel.Conn.Close()
}
}
if s.conn != nil {
return s.conn.Close()
}
return nil
}
// connect establishes connection to SPICE server and performs handshake.
func (s *SpiceSource) connect() error {
// TODO: Implement SPICE connection and protocol handshake
// 1. Connect to main channel (host:port)
// 2. Exchange SPICE link messages
// 3. Authenticate (if password required)
// 4. Establish display and input channels
// 5. Get display configuration
port := s.config.Port
if s.config.UseTLS {
port = s.config.TLSPort
}
addr := fmt.Sprintf("%s:%d", s.config.Host, port)
conn, err := net.DialTimeout("tcp", addr, time.Duration(s.config.ConnectTimeoutMs)*time.Millisecond)
if err != nil {
return fmt.Errorf("failed to connect to %s: %w", addr, err)
}
s.conn = conn
s.connected = true
return fmt.Errorf("SPICE handshake not implemented")
}
// performHandshake handles SPICE protocol negotiation.
func (s *SpiceSource) performHandshake() error {
// TODO: Implement SPICE handshake
// 1. Send SpiceLinkMess with supported channels and capabilities
// 2. Receive SpiceLinkReply
// 3. Authenticate with password (if required)
// 4. Establish channels based on configuration
return fmt.Errorf("SPICE handshake not implemented")
}
// connectChannel establishes a specific SPICE channel.
func (s *SpiceSource) connectChannel(channelType string, channelID int) (*SpiceChannel, error) {
// TODO: Connect to specific SPICE channel
// 1. Open new connection for channel
// 2. Send channel link messages
// 3. Complete channel-specific handshake
// 4. Setup message processing for channel type
return nil, fmt.Errorf("SPICE channel connection not implemented")
}
// processDisplayMessages handles messages on the display channel.
func (s *SpiceSource) processDisplayMessages() error {
// TODO: Process SPICE display messages
// Handle messages like:
// - SPICE_MSG_DISPLAY_MODE (display mode change)
// - SPICE_MSG_DISPLAY_MARK (display updates)
// - SPICE_MSG_DISPLAY_RESET (display reset)
// - SPICE_MSG_DISPLAY_COPY_BITS (copy operation)
// - SPICE_MSG_DISPLAY_INVAL_ALL_PIXMAPS (invalidate caches)
// - Various draw operations (draw_alpha, draw_copy, etc.)
return fmt.Errorf("SPICE display message processing not implemented")
}
// updateDisplay processes a display update command.
func (s *SpiceSource) updateDisplay(displayID uint32, x, y, width, height int, data []byte) error {
// TODO: Update local framebuffer with received display data
// 1. Find or create display surface
// 2. Decompress image data (if compressed)
// 3. Update framebuffer region
return fmt.Errorf("SPICE display update not implemented")
}
// decompressImage decompresses SPICE image data based on format.
func (s *SpiceSource) decompressImage(data []byte, compression string) ([]byte, error) {
// TODO: Implement SPICE image decompression
// Support formats: QUIC, GLZ, LZ, JPEG
switch compression {
case "quic":
return s.decompressQUIC(data)
case "glz":
return s.decompressGLZ(data)
case "lz":
return s.decompressLZ(data)
default:
return data, nil // Uncompressed
}
}
// decompressQUIC decompresses QUIC-compressed image data.
func (s *SpiceSource) decompressQUIC(data []byte) ([]byte, error) {
// TODO: Implement QUIC decompression
return nil, fmt.Errorf("QUIC decompression not implemented")
}
// decompressGLZ decompresses GLZ-compressed image data.
func (s *SpiceSource) decompressGLZ(data []byte) ([]byte, error) {
// TODO: Implement GLZ decompression
return nil, fmt.Errorf("GLZ decompression not implemented")
}
// decompressLZ decompresses LZ-compressed image data.
func (s *SpiceSource) decompressLZ(data []byte) ([]byte, error) {
// TODO: Implement LZ decompression
return nil, fmt.Errorf("LZ decompression not implemented")
}

View file

@ -0,0 +1,221 @@
// VNC client capture for remote desktop access.
package backends
import (
"fmt"
"image"
"net"
"time"
"git.cloonar.com/openclawd/iso-bot/pkg/engine/capture"
)
// VNCConfig holds configuration for VNC client capture.
type VNCConfig struct {
// Host is the VNC server hostname or IP address.
Host string `yaml:"host"`
// Port is the VNC server port (default 5900).
Port int `yaml:"port"`
// Password for VNC authentication.
Password string `yaml:"password"`
// Shared allows multiple VNC clients to connect simultaneously.
Shared bool `yaml:"shared"`
// Encodings specifies preferred pixel encodings (e.g., "raw", "copyrect", "hextile").
Encodings []string `yaml:"encodings"`
// ConnectTimeout is the timeout for initial connection.
ConnectTimeoutMs int `yaml:"connect_timeout_ms"`
// ReadTimeout is the timeout for frame updates.
ReadTimeoutMs int `yaml:"read_timeout_ms"`
}
// VNCSource captures frames from a VNC server.
type VNCSource struct {
config VNCConfig
conn net.Conn
width int
height int
pixelFormat VNCPixelFormat
connected bool
}
// VNCPixelFormat describes the pixel format used by the VNC server.
type VNCPixelFormat struct {
BitsPerPixel uint8
Depth uint8
BigEndian bool
TrueColor bool
RedMax uint16
GreenMax uint16
BlueMax uint16
RedShift uint8
GreenShift uint8
BlueShift uint8
}
// NewVNCSource creates a VNC client capture source.
func NewVNCSource(configMap map[string]interface{}) (capture.Source, error) {
var config VNCConfig
// Extract config from map
if host, ok := configMap["host"].(string); ok {
config.Host = host
} else {
return nil, fmt.Errorf("vnc host is required")
}
if port, ok := configMap["port"].(int); ok {
config.Port = port
} else {
config.Port = 5900 // Default VNC port
}
if password, ok := configMap["password"].(string); ok {
config.Password = password
}
if shared, ok := configMap["shared"].(bool); ok {
config.Shared = shared
} else {
config.Shared = true // Default to shared access
}
if encodings, ok := configMap["encodings"].([]string); ok {
config.Encodings = encodings
} else {
config.Encodings = []string{"raw", "copyrect", "hextile", "rre"} // Default encodings
}
if connectTimeout, ok := configMap["connect_timeout_ms"].(int); ok {
config.ConnectTimeoutMs = connectTimeout
} else {
config.ConnectTimeoutMs = 10000 // 10 seconds
}
if readTimeout, ok := configMap["read_timeout_ms"].(int); ok {
config.ReadTimeoutMs = readTimeout
} else {
config.ReadTimeoutMs = 5000 // 5 seconds
}
return &VNCSource{
config: config,
}, nil
}
// Name returns a description of this capture source.
func (v *VNCSource) Name() string {
return fmt.Sprintf("VNC: %s:%d", v.config.Host, v.config.Port)
}
// Capture grabs a single frame from the VNC server.
func (v *VNCSource) Capture() (image.Image, error) {
if !v.connected {
if err := v.connect(); err != nil {
return nil, fmt.Errorf("failed to connect to VNC server: %w", err)
}
}
// TODO: Implement VNC frame capture
// 1. Send FramebufferUpdateRequest message
// 2. Read FramebufferUpdate response
// 3. Process rectangles with different encodings (Raw, RRE, CoRRE, Hextile, etc.)
// 4. Update local framebuffer
// 5. Convert framebuffer to Go image.Image
return nil, fmt.Errorf("VNC capture not implemented yet")
}
// CaptureRegion grabs a sub-region of the VNC framebuffer.
func (v *VNCSource) CaptureRegion(r capture.Region) (image.Image, error) {
// TODO: Implement region capture
// Send FramebufferUpdateRequest for specific region
return nil, fmt.Errorf("VNC region capture not implemented yet")
}
// Size returns the VNC framebuffer dimensions.
func (v *VNCSource) Size() (width, height int) {
return v.width, v.height
}
// Close disconnects from the VNC server.
func (v *VNCSource) Close() error {
v.connected = false
if v.conn != nil {
return v.conn.Close()
}
return nil
}
// connect establishes a connection to the VNC server and performs handshake.
func (v *VNCSource) connect() error {
// TODO: Implement VNC connection and handshake
// 1. Connect to host:port
// 2. Read protocol version
// 3. Perform authentication (if required)
// 4. Send ClientInit message
// 5. Read ServerInit (framebuffer size, pixel format, etc.)
// 6. Set pixel format and encodings
addr := fmt.Sprintf("%s:%d", v.config.Host, v.config.Port)
conn, err := net.DialTimeout("tcp", addr, time.Duration(v.config.ConnectTimeoutMs)*time.Millisecond)
if err != nil {
return fmt.Errorf("failed to connect to %s: %w", addr, err)
}
v.conn = conn
v.connected = true
return fmt.Errorf("VNC handshake not implemented")
}
// performHandshake handles VNC protocol negotiation.
func (v *VNCSource) performHandshake() error {
// TODO: Implement VNC handshake
// 1. Protocol version negotiation
// 2. Security type negotiation
// 3. Authentication (VNC, None, etc.)
// 4. ClientInit/ServerInit exchange
return fmt.Errorf("VNC handshake not implemented")
}
// readFramebufferUpdate reads and processes a framebuffer update.
func (v *VNCSource) readFramebufferUpdate() error {
// TODO: Implement framebuffer update processing
// 1. Read FramebufferUpdate message header
// 2. Process each rectangle based on encoding type
// 3. Update local framebuffer bitmap
return fmt.Errorf("VNC framebuffer update not implemented")
}
// requestFramebufferUpdate sends a request for screen updates.
func (v *VNCSource) requestFramebufferUpdate(x, y, width, height int, incremental bool) error {
// TODO: Send FramebufferUpdateRequest message
// Message format:
// - Type: 3 (FramebufferUpdateRequest)
// - Incremental: 0/1
// - X, Y, Width, Height: region to update
return fmt.Errorf("VNC framebuffer request not implemented")
}
// decodeRawEncoding processes Raw encoding (uncompressed pixel data).
func (v *VNCSource) decodeRawEncoding(x, y, width, height int) error {
// TODO: Read raw pixel data and update framebuffer
return fmt.Errorf("VNC Raw encoding not implemented")
}
// decodeRREEncoding processes RRE (Rise-and-Run-length Encoding).
func (v *VNCSource) decodeRREEncoding(x, y, width, height int) error {
// TODO: Decode RRE compressed data
return fmt.Errorf("VNC RRE encoding not implemented")
}
// decodeHextileEncoding processes Hextile encoding.
func (v *VNCSource) decodeHextileEncoding(x, y, width, height int) error {
// TODO: Decode Hextile compressed data
return fmt.Errorf("VNC Hextile encoding not implemented")
}

View file

@ -0,0 +1,166 @@
//go:build linux
// Wayland screen capture using PipeWire and xdg-desktop-portal.
package backends
import (
"fmt"
"image"
"git.cloonar.com/openclawd/iso-bot/pkg/engine/capture"
)
// WaylandConfig holds configuration for Wayland screen capture.
type WaylandConfig struct {
// Output is the Wayland output name to capture (e.g., "DP-1", "HDMI-A-1").
// If empty, captures the primary output.
Output string `yaml:"output"`
// UsePipeWire enables PipeWire-based capture for better performance.
// Falls back to xdg-desktop-portal screencasting if false.
UsePipeWire bool `yaml:"use_pipewire"`
// WindowTitle attempts to capture a specific window (if supported).
// Note: Wayland has limited window-specific capture due to security model.
WindowTitle string `yaml:"window_title"`
// Framerate sets the desired capture framerate.
Framerate int `yaml:"framerate"`
}
// WaylandSource captures screen content on Wayland using PipeWire/xdg-desktop-portal.
type WaylandSource struct {
config WaylandConfig
pipeWireCtx uintptr // PipeWire context
stream uintptr // PipeWire stream
width int
height int
active bool
}
// NewWaylandSource creates a Wayland screen capture source.
func NewWaylandSource(configMap map[string]interface{}) (capture.Source, error) {
var config WaylandConfig
// Extract config from map
if output, ok := configMap["output"].(string); ok {
config.Output = output
}
if usePW, ok := configMap["use_pipewire"].(bool); ok {
config.UsePipeWire = usePW
} else {
config.UsePipeWire = true // Default to PipeWire
}
if title, ok := configMap["window_title"].(string); ok {
config.WindowTitle = title
}
if fps, ok := configMap["framerate"].(int); ok {
config.Framerate = fps
} else {
config.Framerate = 30 // Default framerate
}
return &WaylandSource{
config: config,
}, nil
}
// Name returns a description of this capture source.
func (w *WaylandSource) Name() string {
if w.config.Output != "" {
return fmt.Sprintf("Wayland Output: %s", w.config.Output)
}
if w.config.WindowTitle != "" {
return fmt.Sprintf("Wayland Window: %s", w.config.WindowTitle)
}
return "Wayland Screen Capture"
}
// Capture grabs a single frame from Wayland.
func (w *WaylandSource) Capture() (image.Image, error) {
if !w.active {
if err := w.startCapture(); err != nil {
return nil, fmt.Errorf("failed to start Wayland capture: %w", err)
}
}
// TODO: Implement Wayland screen capture
// 1. Use xdg-desktop-portal ScreenCast interface to request screen sharing
// 2. Get PipeWire stream from portal response
// 3. Connect to PipeWire stream and read video buffers
// 4. Convert PipeWire buffer to Go image.Image
return nil, fmt.Errorf("Wayland capture not implemented yet")
}
// CaptureRegion grabs a sub-region of the screen.
func (w *WaylandSource) CaptureRegion(r capture.Region) (image.Image, error) {
// TODO: Implement region capture
// Capture full screen then crop to region, as Wayland doesn't support partial capture
fullFrame, err := w.Capture()
if err != nil {
return nil, err
}
// Crop the image to the specified region
bounds := image.Rect(r.X, r.Y, r.X+r.Width, r.Y+r.Height)
return fullFrame.(interface{
SubImage(r image.Rectangle) image.Image
}).SubImage(bounds), nil
}
// Size returns the screen dimensions.
func (w *WaylandSource) Size() (width, height int) {
// TODO: Get actual screen size from Wayland output info
return w.width, w.height
}
// Close releases Wayland resources.
func (w *WaylandSource) Close() error {
w.active = false
// TODO: Close PipeWire stream and context
return nil
}
// startCapture initiates the screen capture session.
func (w *WaylandSource) startCapture() error {
// TODO: Implement capture initialization
// 1. Connect to xdg-desktop-portal via D-Bus
// 2. Call CreateSession on org.freedesktop.portal.ScreenCast interface
// 3. Configure capture source (screen or window)
// 4. Start capture session
// 5. Extract PipeWire node ID from response
// 6. Connect to PipeWire and setup stream callbacks
w.active = true
return fmt.Errorf("Wayland capture initialization not implemented")
}
// initPipeWire sets up PipeWire connection and stream.
func (w *WaylandSource) initPipeWire(nodeID uint32) error {
// TODO: Implement PipeWire initialization
// 1. Create PipeWire main loop and context
// 2. Create stream with video format constraints
// 3. Connect stream to the portal-provided node
// 4. Setup stream callbacks for buffer processing
return fmt.Errorf("PipeWire initialization not implemented")
}
// requestScreenCast uses xdg-desktop-portal to request screen access.
func (w *WaylandSource) requestScreenCast() (uint32, error) {
// TODO: Implement D-Bus communication with xdg-desktop-portal
// 1. Create D-Bus connection
// 2. Call org.freedesktop.portal.ScreenCast.CreateSession
// 3. Call SelectSources with appropriate source types
// 4. Call Start to begin capture
// 5. Parse response to get PipeWire node ID
return 0, fmt.Errorf("ScreenCast portal request not implemented")
}
// processPipeWireBuffer converts PipeWire video buffer to image.
func (w *WaylandSource) processPipeWireBuffer(buffer uintptr) (image.Image, error) {
// TODO: Implement buffer processing
// 1. Extract video format info (width, height, stride, pixel format)
// 2. Map buffer memory
// 3. Convert pixel data to image.RGBA
// 4. Handle different pixel formats (BGRA, RGBA, etc.)
return nil, fmt.Errorf("PipeWire buffer processing not implemented")
}

View file

@ -0,0 +1,129 @@
//go:build windows
// Windows window capture using BitBlt and DXGI APIs.
package backends
import (
"fmt"
"image"
"unsafe"
"git.cloonar.com/openclawd/iso-bot/pkg/engine/capture"
)
// Win32Config holds configuration for Windows window capture.
type Win32Config struct {
// WindowTitle is the exact window title to capture.
WindowTitle string `yaml:"window_title"`
// WindowClass is the window class name (alternative to title).
WindowClass string `yaml:"window_class"`
// UseDirectX enables DXGI-based capture for better performance.
// Falls back to GDI BitBlt if DXGI fails.
UseDirectX bool `yaml:"use_directx"`
// CaptureCursor includes the mouse cursor in captured frames.
CaptureCursor bool `yaml:"capture_cursor"`
}
// Win32Source captures from a Windows window using Win32 APIs.
type Win32Source struct {
config Win32Config
windowHWND uintptr
width int
height int
}
// NewWin32Source creates a Windows window capture source.
func NewWin32Source(configMap map[string]interface{}) (capture.Source, error) {
var config Win32Config
// Extract config from map
if title, ok := configMap["window_title"].(string); ok {
config.WindowTitle = title
}
if class, ok := configMap["window_class"].(string); ok {
config.WindowClass = class
}
if dx, ok := configMap["use_directx"].(bool); ok {
config.UseDirectX = dx
}
if cursor, ok := configMap["capture_cursor"].(bool); ok {
config.CaptureCursor = cursor
}
// Validate config
if config.WindowTitle == "" && config.WindowClass == "" {
return nil, fmt.Errorf("either window_title or window_class must be specified")
}
return &Win32Source{
config: config,
}, nil
}
// Name returns a description of this capture source.
func (w *Win32Source) Name() string {
if w.config.WindowTitle != "" {
return fmt.Sprintf("Win32 Window: %s", w.config.WindowTitle)
}
return fmt.Sprintf("Win32 Window Class: %s", w.config.WindowClass)
}
// Capture grabs a single frame from the window.
func (w *Win32Source) Capture() (image.Image, error) {
// TODO: Implement Win32 window capture
// 1. Find window handle using FindWindow/FindWindowEx
// 2. Get window dimensions with GetWindowRect
// 3. Create compatible DC and bitmap
// 4. Use BitBlt or DXGI to capture window content
// 5. Convert to Go image.Image
return nil, fmt.Errorf("Win32 capture not implemented yet")
}
// CaptureRegion grabs a sub-region of the window.
func (w *Win32Source) CaptureRegion(r capture.Region) (image.Image, error) {
// TODO: Implement region capture
// Use BitBlt with source coordinates offset by region.X, region.Y
return nil, fmt.Errorf("Win32 region capture not implemented yet")
}
// Size returns the window dimensions.
func (w *Win32Source) Size() (width, height int) {
// TODO: Get actual window size from GetWindowRect
return w.width, w.height
}
// Close releases Win32 resources.
func (w *Win32Source) Close() error {
// TODO: Release DCs, bitmaps, and other Win32 handles
return nil
}
// findWindow locates a window by title or class name.
func (w *Win32Source) findWindow() (uintptr, error) {
// TODO: Use FindWindow/FindWindowEx APIs
// Handle both window title and class name searches
return 0, fmt.Errorf("window lookup not implemented")
}
// getDXGIOutput attempts to capture using DXGI Desktop Duplication API.
func (w *Win32Source) getDXGIOutput() (image.Image, error) {
// TODO: Implement DXGI Desktop Duplication
// 1. Create DXGI factory and enumerate adapters/outputs
// 2. Create output duplication interface
// 3. Acquire next frame
// 4. Map texture and copy to Go image
return nil, fmt.Errorf("DXGI capture not implemented")
}
// getBitBltOutput captures using traditional GDI BitBlt.
func (w *Win32Source) getBitBltOutput() (image.Image, error) {
// TODO: Implement BitBlt capture
// 1. Get window DC with GetWindowDC
// 2. Create compatible DC and bitmap
// 3. BitBlt window content to bitmap
// 4. Get bitmap bits and convert to image.RGBA
return nil, fmt.Errorf("BitBlt capture not implemented")
}

View file

@ -0,0 +1,137 @@
//go:build linux
// X11 window capture using XGetImage.
package backends
import (
"fmt"
"image"
"git.cloonar.com/openclawd/iso-bot/pkg/engine/capture"
)
// X11Config holds configuration for X11 window capture.
type X11Config struct {
// WindowTitle is the window title to search for.
WindowTitle string `yaml:"window_title"`
// WindowClass is the WM_CLASS property to match.
WindowClass string `yaml:"window_class"`
// Display is the X11 display to connect to (e.g., ":0").
Display string `yaml:"display"`
// IncludeDecorations captures window decorations (title bar, borders).
IncludeDecorations bool `yaml:"include_decorations"`
}
// X11Source captures from an X11 window using XGetImage.
type X11Source struct {
config X11Config
display uintptr // *Display
window uint64 // Window ID
width int
height int
}
// NewX11Source creates an X11 window capture source.
func NewX11Source(configMap map[string]interface{}) (capture.Source, error) {
var config X11Config
// Extract config from map
if title, ok := configMap["window_title"].(string); ok {
config.WindowTitle = title
}
if class, ok := configMap["window_class"].(string); ok {
config.WindowClass = class
}
if display, ok := configMap["display"].(string); ok {
config.Display = display
} else {
config.Display = ":0" // Default display
}
if decorations, ok := configMap["include_decorations"].(bool); ok {
config.IncludeDecorations = decorations
}
// Validate config
if config.WindowTitle == "" && config.WindowClass == "" {
return nil, fmt.Errorf("either window_title or window_class must be specified")
}
return &X11Source{
config: config,
}, nil
}
// Name returns a description of this capture source.
func (x *X11Source) Name() string {
if x.config.WindowTitle != "" {
return fmt.Sprintf("X11 Window: %s", x.config.WindowTitle)
}
return fmt.Sprintf("X11 Window Class: %s", x.config.WindowClass)
}
// Capture grabs a single frame from the X11 window.
func (x *X11Source) Capture() (image.Image, error) {
// TODO: Implement X11 window capture
// 1. Open X11 display connection
// 2. Find window by title/class using XQueryTree and property queries
// 3. Get window geometry with XGetGeometry
// 4. Use XGetImage to capture window content
// 5. Convert XImage to Go image.RGBA
return nil, fmt.Errorf("X11 capture not implemented yet")
}
// CaptureRegion grabs a sub-region of the X11 window.
func (x *X11Source) CaptureRegion(r capture.Region) (image.Image, error) {
// TODO: Implement region capture
// Use XGetImage with x, y, width, height parameters for the region
return nil, fmt.Errorf("X11 region capture not implemented yet")
}
// Size returns the window dimensions.
func (x *X11Source) Size() (width, height int) {
// TODO: Get actual window size from XGetGeometry
return x.width, x.height
}
// Close releases X11 resources.
func (x *X11Source) Close() error {
// TODO: Close X11 display connection and free resources
return nil
}
// connectToDisplay opens a connection to the X11 display.
func (x *X11Source) connectToDisplay() error {
// TODO: Use XOpenDisplay to connect to X server
// Store display pointer in x.display
return fmt.Errorf("X11 display connection not implemented")
}
// findWindow searches for a window by title or class.
func (x *X11Source) findWindow() (uint64, error) {
// TODO: Implement window search
// 1. Get root window with XDefaultRootWindow
// 2. Recursively search window tree with XQueryTree
// 3. For each window, check _NET_WM_NAME (title) and WM_CLASS properties
// 4. Return matching window ID
return 0, fmt.Errorf("X11 window search not implemented")
}
// getWindowGeometry retrieves window position and size.
func (x *X11Source) getWindowGeometry() (int, int, int, int, error) {
// TODO: Use XGetGeometry to get window bounds
// Return x, y, width, height, error
return 0, 0, 0, 0, fmt.Errorf("X11 geometry query not implemented")
}
// captureWindowImage captures the window content as an XImage.
func (x *X11Source) captureWindowImage() (image.Image, error) {
// TODO: Implement XGetImage capture
// 1. Get window geometry
// 2. Use XGetImage(display, window, x, y, width, height, AllPlanes, ZPixmap)
// 3. Convert XImage pixel data to image.RGBA
// 4. Handle different bit depths and byte orders
return nil, fmt.Errorf("X11 image capture not implemented")
}

View file

@ -0,0 +1,283 @@
// Package resolution provides a profile system for managing screen regions across different resolutions.
//
// Each game plugin can register resolution profiles that define named screen regions
// for different resolutions. This allows the engine to work with games at various
// resolutions without hardcoding pixel coordinates.
package resolution
import (
"fmt"
"image"
"sort"
)
// Profile defines screen regions for a specific resolution.
type Profile struct {
Width int `yaml:"width"` // Screen width in pixels
Height int `yaml:"height"` // Screen height in pixels
Label string `yaml:"label"` // Human-readable label (e.g., "1080p", "720p")
Regions map[string]image.Rectangle `yaml:"regions"` // Named screen regions
}
// Registry holds profiles per game per resolution.
type Registry struct {
// profiles[gameID][resolution_key] = Profile
profiles map[string]map[string]*Profile
}
// NewRegistry creates a new resolution profile registry.
func NewRegistry() *Registry {
return &Registry{
profiles: make(map[string]map[string]*Profile),
}
}
// Register adds a resolution profile for a specific game.
func (r *Registry) Register(gameID string, profile *Profile) error {
if profile == nil {
return fmt.Errorf("profile cannot be nil")
}
if profile.Width <= 0 || profile.Height <= 0 {
return fmt.Errorf("invalid resolution: %dx%d", profile.Width, profile.Height)
}
if r.profiles[gameID] == nil {
r.profiles[gameID] = make(map[string]*Profile)
}
key := resolutionKey(profile.Width, profile.Height)
r.profiles[gameID][key] = profile
return nil
}
// Get returns the profile for a game and resolution, or error if unsupported.
func (r *Registry) Get(gameID string, width, height int) (*Profile, error) {
gameProfiles, exists := r.profiles[gameID]
if !exists {
return nil, fmt.Errorf("no profiles registered for game %q", gameID)
}
key := resolutionKey(width, height)
profile, exists := gameProfiles[key]
if !exists {
return nil, fmt.Errorf("no profile for game %q at resolution %dx%d", gameID, width, height)
}
return profile, nil
}
// GetRegion returns a named region for a game and resolution.
func (r *Registry) GetRegion(gameID string, width, height int, regionName string) (image.Rectangle, error) {
profile, err := r.Get(gameID, width, height)
if err != nil {
return image.Rectangle{}, err
}
region, exists := profile.Regions[regionName]
if !exists {
return image.Rectangle{}, fmt.Errorf("region %q not found in profile for %s at %dx%d", regionName, gameID, width, height)
}
return region, nil
}
// SupportedResolutions returns resolutions available for a game.
func (r *Registry) SupportedResolutions(gameID string) []image.Point {
gameProfiles, exists := r.profiles[gameID]
if !exists {
return nil
}
var resolutions []image.Point
for _, profile := range gameProfiles {
resolutions = append(resolutions, image.Point{X: profile.Width, Y: profile.Height})
}
// Sort by resolution (width first, then height)
sort.Slice(resolutions, func(i, j int) bool {
if resolutions[i].X != resolutions[j].X {
return resolutions[i].X < resolutions[j].X
}
return resolutions[i].Y < resolutions[j].Y
})
return resolutions
}
// ListGames returns all game IDs that have registered profiles.
func (r *Registry) ListGames() []string {
var games []string
for gameID := range r.profiles {
games = append(games, gameID)
}
sort.Strings(games)
return games
}
// GetProfiles returns all profiles for a specific game.
func (r *Registry) GetProfiles(gameID string) []*Profile {
gameProfiles, exists := r.profiles[gameID]
if !exists {
return nil
}
var profiles []*Profile
for _, profile := range gameProfiles {
profiles = append(profiles, profile)
}
// Sort by resolution
sort.Slice(profiles, func(i, j int) bool {
if profiles[i].Width != profiles[j].Width {
return profiles[i].Width < profiles[j].Width
}
return profiles[i].Height < profiles[j].Height
})
return profiles
}
// ValidateProfile checks if a profile is valid and complete.
func (r *Registry) ValidateProfile(profile *Profile) error {
if profile == nil {
return fmt.Errorf("profile is nil")
}
if profile.Width <= 0 || profile.Height <= 0 {
return fmt.Errorf("invalid resolution: %dx%d", profile.Width, profile.Height)
}
if profile.Label == "" {
return fmt.Errorf("profile label is required")
}
if profile.Regions == nil {
return fmt.Errorf("profile regions map is nil")
}
// Validate that all regions are within screen bounds
screenBounds := image.Rect(0, 0, profile.Width, profile.Height)
for regionName, region := range profile.Regions {
if !region.In(screenBounds) {
return fmt.Errorf("region %q is outside screen bounds: %v not in %v", regionName, region, screenBounds)
}
if region.Empty() {
return fmt.Errorf("region %q is empty: %v", regionName, region)
}
}
return nil
}
// RegisterMultiple registers multiple profiles for a game.
func (r *Registry) RegisterMultiple(gameID string, profiles []*Profile) error {
for _, profile := range profiles {
if err := r.ValidateProfile(profile); err != nil {
return fmt.Errorf("invalid profile %s: %w", profile.Label, err)
}
if err := r.Register(gameID, profile); err != nil {
return fmt.Errorf("failed to register profile %s: %w", profile.Label, err)
}
}
return nil
}
// Clone creates a deep copy of a profile.
func (p *Profile) Clone() *Profile {
clone := &Profile{
Width: p.Width,
Height: p.Height,
Label: p.Label,
Regions: make(map[string]image.Rectangle),
}
for name, region := range p.Regions {
clone.Regions[name] = region
}
return clone
}
// HasRegion checks if a profile contains a named region.
func (p *Profile) HasRegion(name string) bool {
_, exists := p.Regions[name]
return exists
}
// ListRegions returns all region names in the profile.
func (p *Profile) ListRegions() []string {
var names []string
for name := range p.Regions {
names = append(names, name)
}
sort.Strings(names)
return names
}
// AspectRatio returns the aspect ratio of the profile resolution.
func (p *Profile) AspectRatio() float64 {
return float64(p.Width) / float64(p.Height)
}
// IsWidescreen returns true if the profile is widescreen (16:9 or wider).
func (p *Profile) IsWidescreen() bool {
return p.AspectRatio() >= 16.0/9.0
}
// ScaleFrom creates a new profile by scaling regions from another resolution.
func (p *Profile) ScaleFrom(source *Profile) *Profile {
if source == nil {
return nil
}
scaleX := float64(p.Width) / float64(source.Width)
scaleY := float64(p.Height) / float64(source.Height)
scaled := &Profile{
Width: p.Width,
Height: p.Height,
Label: p.Label,
Regions: make(map[string]image.Rectangle),
}
for name, region := range source.Regions {
scaled.Regions[name] = image.Rect(
int(float64(region.Min.X)*scaleX),
int(float64(region.Min.Y)*scaleY),
int(float64(region.Max.X)*scaleX),
int(float64(region.Max.Y)*scaleY),
)
}
return scaled
}
// resolutionKey generates a unique key for a resolution.
func resolutionKey(width, height int) string {
return fmt.Sprintf("%dx%d", width, height)
}
// ParseResolution parses a resolution string like "1920x1080".
func ParseResolution(s string) (width, height int, err error) {
n, err := fmt.Sscanf(s, "%dx%d", &width, &height)
if err != nil {
return 0, 0, fmt.Errorf("invalid resolution format %q: %w", s, err)
}
if n != 2 {
return 0, 0, fmt.Errorf("invalid resolution format %q: expected WIDTHxHEIGHT", s)
}
if width <= 0 || height <= 0 {
return 0, 0, fmt.Errorf("invalid resolution: %dx%d", width, height)
}
return width, height, nil
}
// FormatResolution formats width and height as a resolution string.
func FormatResolution(width, height int) string {
return fmt.Sprintf("%dx%d", width, height)
}

View file

@ -7,6 +7,8 @@ package plugin
import (
"context"
"image"
"git.cloonar.com/openclawd/iso-bot/pkg/engine/capture"
)
// GameState represents the current state of the game (menu, loading, in-game, etc.)
@ -109,6 +111,9 @@ type Plugin interface {
// DefaultLootFilter returns the default loot filter.
DefaultLootFilter() LootFilter
// SupportedResolutions returns resolutions available for this game.
SupportedResolutions() []image.Point
}
// PluginInfo describes a game plugin.
@ -125,6 +130,15 @@ type EngineServices interface {
// Capture returns the current screen frame.
Capture() image.Image
// CaptureSource returns the active capture source.
CaptureSource() capture.Source
// Resolution returns the current capture resolution.
Resolution() (width, height int)
// Region returns a named screen region for the current game and resolution.
Region(name string) image.Rectangle
// Click sends a mouse click at the given position.
Click(pos image.Point)

View file

@ -1,20 +1,11 @@
// D2R-specific configuration: screen regions, colors, timings.
// D2R-specific configuration: colors, timings, and resolution profiles.
package d2r
import "image"
import (
"image"
// ScreenRegions defines UI element positions at 1920x1080.
type ScreenRegions struct {
HealthOrb image.Rectangle
ManaOrb image.Rectangle
XPBar image.Rectangle
Belt image.Rectangle
Minimap image.Rectangle
Inventory image.Rectangle
Stash image.Rectangle
SkillLeft image.Rectangle
SkillRight image.Rectangle
}
"git.cloonar.com/openclawd/iso-bot/pkg/engine/resolution"
)
// HSVRange defines a color range in HSV space.
type HSVRange struct {
@ -44,10 +35,8 @@ type Timings struct {
// Config holds all D2R-specific configuration.
type Config struct {
Resolution image.Point
Regions ScreenRegions
Colors Colors
Timings Timings
Colors Colors
Timings Timings
// Loot settings
PickupUniques bool
@ -63,21 +52,9 @@ type Config struct {
ChickenThreshold float64 // exit game if health below this
}
// DefaultConfig returns the default D2R config for 1920x1080.
// DefaultConfig returns the default D2R configuration.
func DefaultConfig() Config {
return Config{
Resolution: image.Point{X: 1920, Y: 1080},
Regions: ScreenRegions{
HealthOrb: image.Rect(28, 545, 198, 715),
ManaOrb: image.Rect(1722, 545, 1892, 715),
XPBar: image.Rect(0, 1058, 1920, 1080),
Belt: image.Rect(838, 1010, 1082, 1058),
Minimap: image.Rect(1600, 0, 1920, 320),
Inventory: image.Rect(960, 330, 1490, 770),
Stash: image.Rect(430, 330, 960, 770),
SkillLeft: image.Rect(194, 1036, 246, 1088),
SkillRight: image.Rect(1674, 1036, 1726, 1088),
},
Colors: Colors{
HealthFilled: HSVRange{0, 100, 100, 10, 255, 255},
ManaFilled: HSVRange{100, 100, 100, 130, 255, 255},
@ -105,3 +82,45 @@ func DefaultConfig() Config {
ChickenThreshold: 0.2,
}
}
// RegisterProfiles registers D2R resolution profiles with the resolution registry.
func RegisterProfiles(registry *resolution.Registry) error {
profiles := []*resolution.Profile{
// 1920x1080 (1080p) - Primary resolution
{
Width: 1920,
Height: 1080,
Label: "1080p",
Regions: map[string]image.Rectangle{
"health_orb": image.Rect(28, 545, 198, 715),
"mana_orb": image.Rect(1722, 545, 1892, 715),
"xp_bar": image.Rect(0, 1058, 1920, 1080),
"belt": image.Rect(838, 1010, 1082, 1058),
"minimap": image.Rect(1600, 0, 1920, 320),
"inventory": image.Rect(960, 330, 1490, 770),
"stash": image.Rect(430, 330, 960, 770),
"skill_left": image.Rect(194, 1036, 246, 1088),
"skill_right": image.Rect(1674, 1036, 1726, 1088),
},
},
// 1280x720 (720p) - Secondary resolution
{
Width: 1280,
Height: 720,
Label: "720p",
Regions: map[string]image.Rectangle{
"health_orb": image.Rect(19, 363, 132, 477),
"mana_orb": image.Rect(1148, 363, 1261, 477),
"xp_bar": image.Rect(0, 705, 1280, 720),
"belt": image.Rect(559, 673, 721, 705),
"minimap": image.Rect(1067, 0, 1280, 213),
"inventory": image.Rect(640, 220, 993, 513),
"stash": image.Rect(287, 220, 640, 513),
"skill_left": image.Rect(129, 691, 164, 726),
"skill_right": image.Rect(1116, 691, 1151, 726),
},
},
}
return registry.RegisterMultiple("d2r", profiles)
}

View file

@ -9,12 +9,16 @@ import (
// Detector implements plugin.GameDetector for D2R.
type Detector struct {
config Config
config Config
services plugin.EngineServices
}
// NewDetector creates a D2R state detector.
func NewDetector(config Config) *Detector {
return &Detector{config: config}
func NewDetector(config Config, services plugin.EngineServices) *Detector {
return &Detector{
config: config,
services: services,
}
}
// DetectState analyzes a screenshot and returns the current game state.
@ -48,6 +52,13 @@ func (d *Detector) DetectState(frame image.Image) plugin.GameState {
// ReadVitals reads health and mana from the orbs.
func (d *Detector) ReadVitals(frame image.Image) plugin.VitalStats {
// TODO: Analyze health/mana orb regions using color detection
// Get region coordinates from the engine services
healthRegion := d.services.Region("health_orb")
manaRegion := d.services.Region("mana_orb")
_ = healthRegion // Use these regions for color analysis
_ = manaRegion
return plugin.VitalStats{}
}

View file

@ -2,23 +2,27 @@
package d2r
import (
"fmt"
"image"
"git.cloonar.com/openclawd/iso-bot/pkg/plugin"
"git.cloonar.com/openclawd/iso-bot/pkg/engine/resolution"
)
// Plugin implements plugin.Plugin for D2R.
type Plugin struct {
config Config
services plugin.EngineServices
detector *Detector
reader *Reader
config Config
services plugin.EngineServices
detector *Detector
reader *Reader
resolutionRegistry *resolution.Registry
}
// New creates a new D2R plugin with default config.
func New() *Plugin {
return &Plugin{
config: DefaultConfig(),
resolutionRegistry: resolution.NewRegistry(),
}
}
@ -36,8 +40,14 @@ func (p *Plugin) Info() plugin.PluginInfo {
// Init initializes the plugin with engine services.
func (p *Plugin) Init(services plugin.EngineServices) error {
p.services = services
p.detector = NewDetector(p.config)
p.reader = NewReader(p.config)
// Register D2R resolution profiles
if err := RegisterProfiles(p.resolutionRegistry); err != nil {
return fmt.Errorf("failed to register D2R profiles: %w", err)
}
p.detector = NewDetector(p.config, services)
p.reader = NewReader(p.config, services)
return nil
}
@ -63,3 +73,8 @@ func (p *Plugin) DefaultLootFilter() plugin.LootFilter {
// TODO: Return default rule engine
return nil
}
// SupportedResolutions returns resolutions available for D2R.
func (p *Plugin) SupportedResolutions() []image.Point {
return p.resolutionRegistry.SupportedResolutions("d2r")
}

View file

@ -9,12 +9,16 @@ import (
// Reader implements plugin.ScreenReader for D2R.
type Reader struct {
config Config
config Config
services plugin.EngineServices
}
// NewReader creates a D2R screen reader.
func NewReader(config Config) *Reader {
return &Reader{config: config}
func NewReader(config Config, services plugin.EngineServices) *Reader {
return &Reader{
config: config,
services: services,
}
}
// FindItems detects item labels on the ground.