Initial project structure: reusable isometric bot engine with D2R implementation

2026-02-14 08:50:36 +00:00 · 2026-02-14 08:50:36 +00:00 · e0282a7111
commit e0282a7111
44 changed files with 3433 additions and 0 deletions
--- a/engine/init.py
+++ b/engine/init.py
@ -0,0 +1,35 @@
+"""ISO Bot Engine - Core reusable components for isometric game bots.
+
+This module provides the fundamental building blocks for creating bots that work with
+isometric games through screen reading and human-like input simulation.
+
+The engine is designed to be game-agnostic, with game-specific implementations
+built on top of these core components.
+
+Main Components:
+- screen: Screenshot capture, OCR, template matching
+- input: Human-like mouse/keyboard input simulation  
+- vision: Computer vision utilities for object detection
+- state: Game state management with event system
+- navigation: Pathfinding and movement control
+- safety: Anti-detection measures and timing randomization
+"""
+
+__version__ = "0.1.0"
+__author__ = "Hoid"
+
+from .screen.capture import ScreenCapture
+from .input.humanize import HumanInput
+from .state.manager import GameStateManager
+from .vision.detector import ObjectDetector
+from .navigation.pathfinder import Pathfinder
+from .safety.timing import SafetyTimer
+
+__all__ = [
+    "ScreenCapture",
+    "HumanInput", 
+    "GameStateManager",
+    "ObjectDetector",
+    "Pathfinder",
+    "SafetyTimer",
+]
--- a/engine/input/init.py
+++ b/engine/input/init.py
@ -0,0 +1,23 @@
+"""Human-like input simulation for mouse and keyboard interactions.
+
+This module provides tools for generating realistic input patterns that mimic
+human behavior, including natural mouse movement curves and timing variations.
+
+Components:
+- mouse: Human-like mouse movement with Bézier curves
+- keyboard: Keyboard input with realistic timing patterns
+- humanize: Central controller for randomized, human-like interactions
+"""
+
+from .mouse import MouseController, MousePath
+from .keyboard import KeyboardController, KeySequence
+from .humanize import HumanInput, InputConfig
+
+__all__ = [
+    "MouseController",
+    "MousePath",
+    "KeyboardController", 
+    "KeySequence",
+    "HumanInput",
+    "InputConfig",
+]
--- a/engine/input/humanize.py
+++ b/engine/input/humanize.py
@ -0,0 +1,112 @@
+"""Human-like behavior patterns for input simulation.
+
+Provides randomization utilities to make bot inputs appear natural,
+including variable delays, mouse jitter, and activity scheduling.
+"""
+
+import random
+import time
+import logging
+from typing import Tuple, Optional
+from dataclasses import dataclass, field
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class HumanProfile:
+    """Defines a human behavior profile for input randomization."""
+    
+    # Reaction time range in seconds
+    reaction_min: float = 0.15
+    reaction_max: float = 0.45
+    
+    # Mouse movement speed range (pixels per second)
+    mouse_speed_min: float = 400.0
+    mouse_speed_max: float = 1200.0
+    
+    # Click position jitter in pixels
+    click_jitter: int = 3
+    
+    # Chance of double-reading (hesitation before action)
+    hesitation_chance: float = 0.1
+    hesitation_duration: Tuple[float, float] = (0.3, 1.2)
+    
+    # Break scheduling
+    micro_break_interval: Tuple[int, int] = (120, 300)  # seconds
+    micro_break_duration: Tuple[int, int] = (2, 8)  # seconds
+    long_break_interval: Tuple[int, int] = (1800, 3600)  # seconds
+    long_break_duration: Tuple[int, int] = (60, 300)  # seconds
+
+
+class Humanizer:
+    """Applies human-like randomization to bot actions."""
+    
+    def __init__(self, profile: Optional[HumanProfile] = None):
+        self.profile = profile or HumanProfile()
+        self._last_micro_break = time.time()
+        self._last_long_break = time.time()
+        self._next_micro_break = self._schedule_break(self.profile.micro_break_interval)
+        self._next_long_break = self._schedule_break(self.profile.long_break_interval)
+        self._action_count = 0
+    
+    def reaction_delay(self) -> float:
+        """Generate a human-like reaction delay."""
+        base = random.uniform(self.profile.reaction_min, self.profile.reaction_max)
+        
+        # Occasionally add hesitation
+        if random.random() < self.profile.hesitation_chance:
+            base += random.uniform(*self.profile.hesitation_duration)
+        
+        # Slight fatigue factor based on actions performed
+        fatigue = min(self._action_count / 1000, 0.3)
+        base *= (1 + fatigue * random.random())
+        
+        return base
+    
+    def jitter_position(self, x: int, y: int) -> Tuple[int, int]:
+        """Add small random offset to click position."""
+        jitter = self.profile.click_jitter
+        return (
+            x + random.randint(-jitter, jitter),
+            y + random.randint(-jitter, jitter),
+        )
+    
+    def mouse_speed(self) -> float:
+        """Get randomized mouse movement speed."""
+        return random.uniform(
+            self.profile.mouse_speed_min,
+            self.profile.mouse_speed_max,
+        )
+    
+    def should_take_break(self) -> Optional[float]:
+        """Check if it's time for a break. Returns break duration or None."""
+        now = time.time()
+        
+        if now >= self._next_long_break:
+            duration = random.uniform(*self.profile.long_break_duration)
+            self._next_long_break = now + duration + self._schedule_break(
+                self.profile.long_break_interval
+            )
+            logger.info(f"Long break: {duration:.0f}s")
+            return duration
+        
+        if now >= self._next_micro_break:
+            duration = random.uniform(*self.profile.micro_break_duration)
+            self._next_micro_break = now + duration + self._schedule_break(
+                self.profile.micro_break_interval
+            )
+            logger.debug(f"Micro break: {duration:.1f}s")
+            return duration
+        
+        return None
+    
+    def wait(self) -> None:
+        """Wait for a human-like reaction delay."""
+        delay = self.reaction_delay()
+        time.sleep(delay)
+        self._action_count += 1
+    
+    def _schedule_break(self, interval: Tuple[int, int]) -> float:
+        """Schedule next break with randomized interval."""
+        return time.time() + random.uniform(*interval)
--- a/engine/input/keyboard.py
+++ b/engine/input/keyboard.py
@ -0,0 +1,368 @@
+"""Human-like keyboard input simulation with realistic timing.
+
+Provides keyboard input with natural typing patterns, including
+varied keystroke timing and realistic human typing characteristics.
+"""
+
+from typing import List, Dict, Optional, Union
+from dataclasses import dataclass
+import time
+import random
+import logging
+
+import pyautogui
+from pynput import keyboard
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class KeySequence:
+    """Represents a sequence of keys with timing information."""
+    keys: List[str]
+    delays: List[float] 
+    total_duration: float
+
+
+class TypingProfile:
+    """Defines typing characteristics for human-like input."""
+    
+    def __init__(self, wpm: int = 60, accuracy: float = 0.95):
+        """Initialize typing profile.
+        
+        Args:
+            wpm: Words per minute typing speed
+            accuracy: Typing accuracy (0.0 to 1.0)
+        """
+        self.wpm = wpm
+        self.accuracy = accuracy
+        
+        # Calculate base timing from WPM (assuming 5 characters per word)
+        chars_per_minute = wpm * 5
+        self.base_char_delay = 60.0 / chars_per_minute
+        
+        # Timing variations
+        self.min_delay = 0.02  # Minimum delay between keys
+        self.max_delay = 0.5   # Maximum delay between keys
+        self.word_pause = 0.1  # Additional pause after spaces
+        
+        # Common typing patterns
+        self.difficult_sequences = {
+            'th', 'ch', 'sh', 'qu', 'tion', 'ing', 'er', 'ed'
+        }
+        
+        # Keys that typically take longer to find
+        self.slow_keys = {
+            'q', 'z', 'x', 'j', 'k', 'shift', 'ctrl', 'alt'
+        }
+
+
+class KeyboardController:
+    """Controller for human-like keyboard input."""
+    
+    def __init__(self, typing_profile: Optional[TypingProfile] = None):
+        """Initialize keyboard controller.
+        
+        Args:
+            typing_profile: Typing characteristics, or None for default
+        """
+        self.profile = typing_profile or TypingProfile()
+        self.shift_held = False
+        self.ctrl_held = False
+        self.alt_held = False
+        
+        # Key mapping for special keys
+        self.key_mapping = {
+            'enter': '\n',
+            'return': '\n',
+            'tab': '\t',
+            'space': ' ',
+            'backspace': 'backspace',
+            'delete': 'delete',
+            'escape': 'esc',
+            'esc': 'esc',
+        }
+    
+    def calculate_key_delay(self, key: str, previous_key: Optional[str] = None) -> float:
+        """Calculate realistic delay for typing a key.
+        
+        Args:
+            key: Key to type
+            previous_key: Previously typed key for sequence analysis
+            
+        Returns:
+            Delay in seconds before typing this key
+        """
+        # Base delay from typing speed
+        delay = self.profile.base_char_delay
+        
+        # Adjust for key difficulty
+        if key.lower() in self.profile.slow_keys:
+            delay *= random.uniform(1.2, 1.8)
+        
+        # Adjust for difficult sequences
+        if previous_key:
+            sequence = previous_key.lower() + key.lower()
+            if any(seq in sequence for seq in self.profile.difficult_sequences):
+                delay *= random.uniform(1.1, 1.5)
+        
+        # Add natural variation
+        delay *= random.uniform(0.7, 1.4)
+        
+        # Extra pause after spaces (word boundaries)
+        if previous_key and previous_key == ' ':
+            delay += self.profile.word_pause * random.uniform(0.5, 1.5)
+        
+        # Clamp to reasonable bounds
+        return max(self.profile.min_delay, min(self.profile.max_delay, delay))
+    
+    def type_text(self, text: str, include_errors: bool = False) -> None:
+        """Type text with human-like timing and optional errors.
+        
+        Args:
+            text: Text to type
+            include_errors: Whether to include typing errors and corrections
+        """
+        if not text:
+            return
+        
+        logger.debug(f"Typing text: '{text[:50]}{'...' if len(text) > 50 else ''}'")
+        
+        previous_key = None
+        
+        for i, char in enumerate(text):
+            # Calculate delay before this character
+            delay = self.calculate_key_delay(char, previous_key)
+            
+            # Sleep before typing
+            time.sleep(delay)
+            
+            # Occasionally make typing errors if enabled
+            if include_errors and self.should_make_error():
+                self.make_typing_error(char)
+            else:
+                self.type_key(char)
+            
+            previous_key = char
+    
+    def should_make_error(self) -> bool:
+        """Determine if a typing error should be made.
+        
+        Returns:
+            True if an error should be made
+        """
+        return random.random() > self.profile.accuracy
+    
+    def make_typing_error(self, intended_key: str) -> None:
+        """Make a typing error and correct it.
+        
+        Args:
+            intended_key: The key that was supposed to be typed
+        """
+        # Type wrong key (usually adjacent on keyboard)
+        wrong_key = self.get_adjacent_key(intended_key)
+        self.type_key(wrong_key)
+        
+        # Pause as human realizes mistake
+        time.sleep(random.uniform(0.1, 0.4))
+        
+        # Backspace to correct
+        self.type_key('backspace')
+        time.sleep(random.uniform(0.05, 0.15))
+        
+        # Type correct key
+        self.type_key(intended_key)
+    
+    def get_adjacent_key(self, key: str) -> str:
+        """Get an adjacent key for typing errors.
+        
+        Args:
+            key: Original key
+            
+        Returns:
+            Adjacent key that could be mistyped
+        """
+        # Simplified adjacent key mapping
+        adjacent_map = {
+            'a': 'sq', 'b': 'vgn', 'c': 'xvd', 'd': 'sfe', 'e': 'wrd',
+            'f': 'dgr', 'g': 'fht', 'h': 'gyu', 'i': 'uko', 'j': 'hnu',
+            'k': 'jmo', 'l': 'kpo', 'm': 'njk', 'n': 'bhm', 'o': 'ilp',
+            'p': 'olo', 'q': 'wa', 'r': 'etf', 's': 'adw', 't': 'rgy',
+            'u': 'yhi', 'v': 'cfg', 'w': 'qse', 'x': 'zdc', 'y': 'tgu',
+            'z': 'xas'
+        }
+        
+        adjacent_keys = adjacent_map.get(key.lower(), 'abcd')
+        return random.choice(adjacent_keys)
+    
+    def type_key(self, key: str) -> None:
+        """Type a single key.
+        
+        Args:
+            key: Key to type
+        """
+        # Handle special keys
+        if key.lower() in self.key_mapping:
+            mapped_key = self.key_mapping[key.lower()]
+            if mapped_key in ['backspace', 'delete', 'esc']:
+                pyautogui.press(mapped_key)
+            else:
+                pyautogui.write(mapped_key)
+        else:
+            pyautogui.write(key)
+    
+    def press_key_combination(self, *keys: str) -> None:
+        """Press a combination of keys (e.g., Ctrl+C).
+        
+        Args:
+            keys: Keys to press together
+        """
+        logger.debug(f"Pressing key combination: {'+'.join(keys)}")
+        
+        # Press all keys down
+        for key in keys:
+            pyautogui.keyDown(key)
+            time.sleep(random.uniform(0.01, 0.03))
+        
+        # Hold briefly
+        time.sleep(random.uniform(0.05, 0.1))
+        
+        # Release all keys (in reverse order)
+        for key in reversed(keys):
+            pyautogui.keyUp(key)
+            time.sleep(random.uniform(0.01, 0.03))
+    
+    def press_key(self, key: str, duration: Optional[float] = None) -> None:
+        """Press and release a key.
+        
+        Args:
+            key: Key to press
+            duration: How long to hold key, or None for quick press
+        """
+        if duration is None:
+            pyautogui.press(key)
+        else:
+            pyautogui.keyDown(key)
+            time.sleep(duration)
+            pyautogui.keyUp(key)
+    
+    def hold_key(self, key: str) -> None:
+        """Start holding a key down.
+        
+        Args:
+            key: Key to hold
+        """
+        pyautogui.keyDown(key)
+        
+        # Track modifier keys
+        if key.lower() == 'shift':
+            self.shift_held = True
+        elif key.lower() in ['ctrl', 'control']:
+            self.ctrl_held = True
+        elif key.lower() == 'alt':
+            self.alt_held = True
+    
+    def release_key(self, key: str) -> None:
+        """Stop holding a key.
+        
+        Args:
+            key: Key to release
+        """
+        pyautogui.keyUp(key)
+        
+        # Track modifier keys
+        if key.lower() == 'shift':
+            self.shift_held = False
+        elif key.lower() in ['ctrl', 'control']:
+            self.ctrl_held = False
+        elif key.lower() == 'alt':
+            self.alt_held = False
+    
+    def release_all_keys(self) -> None:
+        """Release all held modifier keys."""
+        if self.shift_held:
+            self.release_key('shift')
+        if self.ctrl_held:
+            self.release_key('ctrl')
+        if self.alt_held:
+            self.release_key('alt')
+    
+    def type_number_sequence(self, numbers: Union[str, int], 
+                           use_numpad: bool = False) -> None:
+        """Type a sequence of numbers.
+        
+        Args:
+            numbers: Numbers to type
+            use_numpad: Whether to use numpad keys
+        """
+        number_str = str(numbers)
+        
+        for digit in number_str:
+            if digit.isdigit():
+                if use_numpad:
+                    key = f'num{digit}'
+                else:
+                    key = digit
+                
+                self.type_key(key)
+                time.sleep(self.calculate_key_delay(digit))
+    
+    def simulate_pause(self, pause_type: str = 'thinking') -> None:
+        """Simulate natural pauses in typing.
+        
+        Args:
+            pause_type: Type of pause ('thinking', 'reading', 'short')
+        """
+        if pause_type == 'thinking':
+            duration = random.uniform(0.5, 2.0)
+        elif pause_type == 'reading':
+            duration = random.uniform(0.2, 0.8)
+        else:  # short
+            duration = random.uniform(0.1, 0.3)
+        
+        logger.debug(f"Simulating {pause_type} pause for {duration:.2f}s")
+        time.sleep(duration)
+    
+    def generate_key_sequence(self, text: str) -> KeySequence:
+        """Generate a key sequence with timing for given text.
+        
+        Args:
+            text: Text to generate sequence for
+            
+        Returns:
+            KeySequence with keys and delays
+        """
+        keys = list(text)
+        delays = []
+        total_duration = 0.0
+        
+        previous_key = None
+        
+        for key in keys:
+            delay = self.calculate_key_delay(key, previous_key)
+            delays.append(delay)
+            total_duration += delay
+            previous_key = key
+        
+        return KeySequence(keys, delays, total_duration)
+    
+    def set_typing_speed(self, wpm: int) -> None:
+        """Set typing speed.
+        
+        Args:
+            wpm: Words per minute
+        """
+        self.profile.wpm = max(10, min(200, wpm))
+        chars_per_minute = self.profile.wpm * 5
+        self.profile.base_char_delay = 60.0 / chars_per_minute
+        
+        logger.info(f"Typing speed set to {self.profile.wpm} WPM")
+    
+    def set_accuracy(self, accuracy: float) -> None:
+        """Set typing accuracy.
+        
+        Args:
+            accuracy: Accuracy from 0.0 to 1.0
+        """
+        self.profile.accuracy = max(0.0, min(1.0, accuracy))
+        logger.info(f"Typing accuracy set to {self.profile.accuracy * 100:.1f}%")
--- a/engine/input/mouse.py
+++ b/engine/input/mouse.py
@ -0,0 +1,345 @@
+"""Human-like mouse movement and clicking with Bézier curves.
+
+Provides realistic mouse movement patterns using Bézier curves with
+randomized control points and natural acceleration/deceleration.
+"""
+
+from typing import Tuple, List, Optional, Callable
+from dataclasses import dataclass
+import time
+import math
+import random
+import logging
+
+import pyautogui
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+# Disable pyautogui failsafe for production use
+pyautogui.FAILSAFE = False
+
+
+@dataclass 
+class MousePath:
+    """Represents a mouse movement path with timing."""
+    points: List[Tuple[int, int]]
+    delays: List[float]
+    total_duration: float
+
+
+class BezierCurve:
+    """Bézier curve generation for natural mouse movement."""
+    
+    @staticmethod
+    def cubic_bezier(t: float, p0: Tuple[float, float], p1: Tuple[float, float],
+                    p2: Tuple[float, float], p3: Tuple[float, float]) -> Tuple[float, float]:
+        """Calculate point on cubic Bézier curve at parameter t.
+        
+        Args:
+            t: Parameter from 0 to 1
+            p0: Start point
+            p1: First control point
+            p2: Second control point 
+            p3: End point
+            
+        Returns:
+            (x, y) point on curve
+        """
+        x = (1-t)**3 * p0[0] + 3*(1-t)**2*t * p1[0] + 3*(1-t)*t**2 * p2[0] + t**3 * p3[0]
+        y = (1-t)**3 * p0[1] + 3*(1-t)**2*t * p1[1] + 3*(1-t)*t**2 * p2[1] + t**3 * p3[1]
+        return (x, y)
+    
+    @staticmethod
+    def generate_control_points(start: Tuple[int, int], end: Tuple[int, int],
+                              randomness: float = 0.3) -> Tuple[Tuple[float, float], Tuple[float, float]]:
+        """Generate random control points for natural curve.
+        
+        Args:
+            start: Starting position
+            end: Ending position
+            randomness: Amount of randomness (0.0 to 1.0)
+            
+        Returns:
+            Tuple of two control points
+        """
+        dx = end[0] - start[0]
+        dy = end[1] - start[1]
+        distance = math.sqrt(dx*dx + dy*dy)
+        
+        # Control point offset based on distance and randomness
+        offset_magnitude = distance * randomness * random.uniform(0.2, 0.8)
+        
+        # Random angles for control points
+        angle1 = random.uniform(-math.pi, math.pi)
+        angle2 = random.uniform(-math.pi, math.pi)
+        
+        # First control point (closer to start)
+        cp1_x = start[0] + dx * 0.25 + math.cos(angle1) * offset_magnitude
+        cp1_y = start[1] + dy * 0.25 + math.sin(angle1) * offset_magnitude
+        
+        # Second control point (closer to end)
+        cp2_x = start[0] + dx * 0.75 + math.cos(angle2) * offset_magnitude
+        cp2_y = start[1] + dy * 0.75 + math.sin(angle2) * offset_magnitude
+        
+        return ((cp1_x, cp1_y), (cp2_x, cp2_y))
+
+
+class MouseController:
+    """Controller for human-like mouse interactions."""
+    
+    def __init__(self):
+        """Initialize mouse controller."""
+        self.current_pos = pyautogui.position()
+        self.movement_speed = 1.0  # Multiplier for movement speed
+        self.click_variance = 3    # Pixel variance for click positions
+        
+        # Movement timing parameters
+        self.min_duration = 0.1    # Minimum movement time
+        self.max_duration = 1.5    # Maximum movement time  
+        self.base_speed = 1000     # Base pixels per second
+        
+    def get_current_position(self) -> Tuple[int, int]:
+        """Get current mouse position.
+        
+        Returns:
+            (x, y) tuple of current position
+        """
+        self.current_pos = pyautogui.position()
+        return self.current_pos
+    
+    def calculate_movement_duration(self, start: Tuple[int, int], 
+                                  end: Tuple[int, int]) -> float:
+        """Calculate realistic movement duration based on distance.
+        
+        Args:
+            start: Starting position
+            end: Ending position
+            
+        Returns:
+            Movement duration in seconds
+        """
+        dx = end[0] - start[0]
+        dy = end[1] - start[1]
+        distance = math.sqrt(dx*dx + dy*dy)
+        
+        # Fitts' Law inspired calculation
+        # Time increases logarithmically with distance
+        base_time = distance / (self.base_speed * self.movement_speed)
+        fitts_factor = math.log2(1 + distance / 10) / 10
+        
+        duration = base_time + fitts_factor
+        
+        # Add some randomness
+        duration *= random.uniform(0.8, 1.2)
+        
+        # Clamp to reasonable bounds
+        return max(self.min_duration, min(self.max_duration, duration))
+    
+    def generate_movement_path(self, start: Tuple[int, int], end: Tuple[int, int],
+                              duration: Optional[float] = None,
+                              steps: Optional[int] = None) -> MousePath:
+        """Generate Bézier curve path for mouse movement.
+        
+        Args:
+            start: Starting position
+            end: Ending position
+            duration: Movement duration, or None to calculate
+            steps: Number of steps, or None to calculate
+            
+        Returns:
+            MousePath with points and timing
+        """
+        if duration is None:
+            duration = self.calculate_movement_duration(start, end)
+        
+        if steps is None:
+            # Calculate steps based on distance and duration
+            distance = math.sqrt((end[0] - start[0])**2 + (end[1] - start[1])**2)
+            steps = max(10, int(distance / 10))  # Roughly 10 pixels per step
+        
+        # Generate control points
+        cp1, cp2 = BezierCurve.generate_control_points(start, end)
+        
+        # Generate path points
+        points = []
+        delays = []
+        
+        for i in range(steps + 1):
+            t = i / steps
+            
+            # Use ease-in-out curve for timing
+            timing_t = self._ease_in_out(t)
+            
+            # Calculate position on Bézier curve
+            x, y = BezierCurve.cubic_bezier(timing_t, start, cp1, cp2, end)
+            points.append((int(x), int(y)))
+            
+            # Calculate delay for this step
+            if i < steps:
+                delay = duration / steps
+                # Add small random variation
+                delay *= random.uniform(0.8, 1.2)
+                delays.append(delay)
+        
+        return MousePath(points, delays, duration)
+    
+    def move_to(self, target: Tuple[int, int], duration: Optional[float] = None) -> None:
+        """Move mouse to target position using Bézier curve.
+        
+        Args:
+            target: Target (x, y) position
+            duration: Movement duration, or None to calculate
+        """
+        start = self.get_current_position()
+        path = self.generate_movement_path(start, target, duration)
+        
+        logger.debug(f"Moving mouse from {start} to {target} in {path.total_duration:.2f}s")
+        
+        for i, point in enumerate(path.points[1:], 1):
+            pyautogui.moveTo(point[0], point[1], duration=0)
+            
+            if i <= len(path.delays):
+                time.sleep(path.delays[i-1])
+        
+        self.current_pos = target
+    
+    def click(self, position: Optional[Tuple[int, int]] = None, 
+             button: str = 'left', move_first: bool = True) -> None:
+        """Click at specified position with human-like variation.
+        
+        Args:
+            position: Click position, or None for current position
+            button: Mouse button ('left', 'right', 'middle')
+            move_first: Whether to move to position first
+        """
+        if position is None:
+            position = self.get_current_position()
+        else:
+            # Add small random offset for more human-like clicking
+            offset_x = random.randint(-self.click_variance, self.click_variance)
+            offset_y = random.randint(-self.click_variance, self.click_variance)
+            position = (position[0] + offset_x, position[1] + offset_y)
+        
+        if move_first and position != self.get_current_position():
+            self.move_to(position)
+        
+        # Random pre-click delay
+        time.sleep(random.uniform(0.01, 0.05))
+        
+        logger.debug(f"Clicking {button} button at {position}")
+        pyautogui.click(position[0], position[1], button=button)
+        
+        # Random post-click delay
+        time.sleep(random.uniform(0.01, 0.08))
+    
+    def double_click(self, position: Optional[Tuple[int, int]] = None,
+                    move_first: bool = True) -> None:
+        """Double-click at specified position.
+        
+        Args:
+            position: Click position, or None for current position
+            move_first: Whether to move to position first
+        """
+        if position is None:
+            position = self.get_current_position()
+        
+        if move_first and position != self.get_current_position():
+            self.move_to(position)
+        
+        # Random delay before double-click
+        time.sleep(random.uniform(0.01, 0.05))
+        
+        logger.debug(f"Double-clicking at {position}")
+        pyautogui.doubleClick(position[0], position[1])
+        
+        # Random delay after double-click
+        time.sleep(random.uniform(0.05, 0.1))
+    
+    def drag(self, start: Tuple[int, int], end: Tuple[int, int],
+            button: str = 'left', duration: Optional[float] = None) -> None:
+        """Drag from start to end position.
+        
+        Args:
+            start: Starting position
+            end: Ending position
+            button: Mouse button to drag with
+            duration: Drag duration, or None to calculate
+        """
+        # Move to start position
+        self.move_to(start)
+        
+        # Mouse down
+        time.sleep(random.uniform(0.01, 0.03))
+        pyautogui.mouseDown(start[0], start[1], button=button)
+        
+        # Wait briefly before starting drag
+        time.sleep(random.uniform(0.05, 0.1))
+        
+        # Generate drag path
+        path = self.generate_movement_path(start, end, duration)
+        
+        logger.debug(f"Dragging from {start} to {end}")
+        
+        # Execute drag movement
+        for i, point in enumerate(path.points[1:], 1):
+            pyautogui.moveTo(point[0], point[1], duration=0)
+            
+            if i <= len(path.delays):
+                time.sleep(path.delays[i-1])
+        
+        # Mouse up
+        time.sleep(random.uniform(0.01, 0.03))
+        pyautogui.mouseUp(end[0], end[1], button=button)
+        
+        self.current_pos = end
+    
+    def scroll(self, clicks: int, position: Optional[Tuple[int, int]] = None) -> None:
+        """Scroll at specified position.
+        
+        Args:
+            clicks: Number of scroll clicks (positive = up, negative = down)
+            position: Scroll position, or None for current position
+        """
+        if position is not None and position != self.get_current_position():
+            self.move_to(position)
+        
+        # Random delay before scrolling
+        time.sleep(random.uniform(0.05, 0.15))
+        
+        # Scroll with small delays between clicks for more human-like behavior
+        for i in range(abs(clicks)):
+            scroll_direction = 1 if clicks > 0 else -1
+            pyautogui.scroll(scroll_direction)
+            
+            if i < abs(clicks) - 1:  # Don't delay after last scroll
+                time.sleep(random.uniform(0.02, 0.08))
+    
+    def _ease_in_out(self, t: float) -> float:
+        """Ease-in-out function for smooth acceleration/deceleration.
+        
+        Args:
+            t: Input parameter (0 to 1)
+            
+        Returns:
+            Eased parameter (0 to 1)
+        """
+        return t * t * (3.0 - 2.0 * t)
+    
+    def set_movement_speed(self, speed: float) -> None:
+        """Set movement speed multiplier.
+        
+        Args:
+            speed: Speed multiplier (1.0 = normal, 2.0 = double speed, etc.)
+        """
+        self.movement_speed = max(0.1, min(5.0, speed))
+        logger.info(f"Mouse movement speed set to {self.movement_speed}x")
+    
+    def set_click_variance(self, variance: int) -> None:
+        """Set click position variance in pixels.
+        
+        Args:
+            variance: Maximum pixel offset for clicks
+        """
+        self.click_variance = max(0, min(10, variance))
+        logger.info(f"Click variance set to {self.click_variance} pixels")
--- a/engine/navigation/init.py
+++ b/engine/navigation/init.py
--- a/engine/navigation/movement.py
+++ b/engine/navigation/movement.py
@ -0,0 +1,46 @@
+"""Character movement control for isometric games.
+
+Handles click-to-move navigation with human-like patterns.
+"""
+
+from typing import Tuple, Optional
+import logging
+import time
+
+import numpy as np
+
+from engine.input.mouse import MouseController
+from engine.input.humanize import Humanizer
+from engine.navigation.pathfinder import Waypoint, WaypointGraph
+
+logger = logging.getLogger(__name__)
+
+
+class MovementController:
+    """Controls character movement via click-to-move."""
+    
+    def __init__(self, mouse: MouseController, humanizer: Humanizer):
+        self.mouse = mouse
+        self.humanizer = humanizer
+        self.waypoints = WaypointGraph()
+    
+    def click_to_move(self, x: int, y: int) -> None:
+        """Click a screen position to move there."""
+        jx, jy = self.humanizer.jitter_position(x, y)
+        self.mouse.move_to(jx, jy)
+        self.humanizer.wait()
+        self.mouse.click()
+    
+    def navigate_waypoints(self, start: str, goal: str) -> bool:
+        """Navigate between named waypoints."""
+        path = self.waypoints.find_path(start, goal)
+        if not path:
+            logger.warning(f"No path from {start} to {goal}")
+            return False
+        
+        for waypoint in path[1:]:  # Skip start
+            self.click_to_move(waypoint.screen_x, waypoint.screen_y)
+            # Wait for movement (game-specific timing)
+            time.sleep(self.humanizer.reaction_delay() + 0.5)
+        
+        return True
--- a/engine/navigation/pathfinder.py
+++ b/engine/navigation/pathfinder.py
@ -0,0 +1,78 @@
+"""Pathfinding for isometric game navigation.
+
+Implements A* and click-to-move navigation for isometric games
+where the bot needs to move between known locations.
+"""
+
+from typing import List, Tuple, Optional, Dict
+from dataclasses import dataclass
+import heapq
+import math
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Waypoint:
+    """A named location in the game world."""
+    name: str
+    screen_x: int
+    screen_y: int
+    metadata: Dict = None
+
+
+class WaypointGraph:
+    """Graph of connected waypoints for navigation."""
+    
+    def __init__(self):
+        self._waypoints: Dict[str, Waypoint] = {}
+        self._edges: Dict[str, List[str]] = {}
+    
+    def add_waypoint(self, waypoint: Waypoint) -> None:
+        self._waypoints[waypoint.name] = waypoint
+        self._edges.setdefault(waypoint.name, [])
+    
+    def connect(self, name_a: str, name_b: str, bidirectional: bool = True) -> None:
+        self._edges.setdefault(name_a, []).append(name_b)
+        if bidirectional:
+            self._edges.setdefault(name_b, []).append(name_a)
+    
+    def find_path(self, start: str, goal: str) -> Optional[List[Waypoint]]:
+        """A* pathfinding between waypoints."""
+        if start not in self._waypoints or goal not in self._waypoints:
+            return None
+        
+        goal_wp = self._waypoints[goal]
+        
+        def heuristic(name: str) -> float:
+            wp = self._waypoints[name]
+            return math.hypot(wp.screen_x - goal_wp.screen_x, wp.screen_y - goal_wp.screen_y)
+        
+        open_set = [(heuristic(start), 0, start)]
+        came_from: Dict[str, str] = {}
+        g_score: Dict[str, float] = {start: 0}
+        
+        while open_set:
+            _, cost, current = heapq.heappop(open_set)
+            
+            if current == goal:
+                path = []
+                while current in came_from:
+                    path.append(self._waypoints[current])
+                    current = came_from[current]
+                path.append(self._waypoints[start])
+                return list(reversed(path))
+            
+            for neighbor in self._edges.get(current, []):
+                n_wp = self._waypoints[neighbor]
+                c_wp = self._waypoints[current]
+                edge_cost = math.hypot(n_wp.screen_x - c_wp.screen_x, n_wp.screen_y - c_wp.screen_y)
+                tentative_g = g_score[current] + edge_cost
+                
+                if tentative_g < g_score.get(neighbor, float('inf')):
+                    came_from[neighbor] = current
+                    g_score[neighbor] = tentative_g
+                    heapq.heappush(open_set, (tentative_g + heuristic(neighbor), tentative_g, neighbor))
+        
+        return None
--- a/engine/safety/init.py
+++ b/engine/safety/init.py
--- a/engine/safety/patterns.py
+++ b/engine/safety/patterns.py
@ -0,0 +1,73 @@
+"""Behavioral pattern randomization for anti-detection.
+
+Varies bot behavior to avoid detectable patterns like identical
+farming routes, consistent timing, or perfect execution.
+"""
+
+import random
+import logging
+from typing import List, Tuple, Callable, Any
+
+logger = logging.getLogger(__name__)
+
+
+class RouteRandomizer:
+    """Randomizes farming routes and action sequences."""
+    
+    def __init__(self, variation_factor: float = 0.15):
+        self.variation_factor = variation_factor
+    
+    def shuffle_optional_steps(
+        self, steps: List[Any], required_indices: List[int] = None,
+    ) -> List[Any]:
+        """Shuffle non-required steps while keeping required ones in order."""
+        required_indices = set(required_indices or [])
+        required = [(i, s) for i, s in enumerate(steps) if i in required_indices]
+        optional = [s for i, s in enumerate(steps) if i not in required_indices]
+        
+        random.shuffle(optional)
+        
+        result = []
+        opt_iter = iter(optional)
+        req_iter = iter(required)
+        next_req = next(req_iter, None)
+        
+        for i in range(len(steps)):
+            if next_req and next_req[0] == i:
+                result.append(next_req[1])
+                next_req = next(req_iter, None)
+            else:
+                result.append(next(opt_iter))
+        
+        return result
+    
+    def vary_route(
+        self, waypoints: List[Tuple[int, int]],
+    ) -> List[Tuple[int, int]]:
+        """Add slight variations to a route's waypoints."""
+        varied = []
+        for x, y in waypoints:
+            offset_x = int(x * self.variation_factor * random.uniform(-1, 1))
+            offset_y = int(y * self.variation_factor * random.uniform(-1, 1))
+            varied.append((x + offset_x, y + offset_y))
+        return varied
+    
+    def should_skip_optional(self, skip_chance: float = 0.1) -> bool:
+        """Randomly decide to skip an optional action."""
+        return random.random() < skip_chance
+
+
+class ActionVariator:
+    """Varies how actions are performed."""
+    
+    @staticmethod
+    def vary_count(target: int, variance: int = 1) -> int:
+        """Vary a repeat count (e.g., click 2-4 times instead of always 3)."""
+        return max(1, target + random.randint(-variance, variance))
+    
+    @staticmethod
+    def random_order(actions: List[Callable]) -> List[Callable]:
+        """Randomize the order of independent actions."""
+        shuffled = actions.copy()
+        random.shuffle(shuffled)
+        return shuffled
--- a/engine/safety/timing.py
+++ b/engine/safety/timing.py
@ -0,0 +1,68 @@
+"""Anti-detection timing and break scheduling.
+
+Manages play sessions with realistic timing patterns to avoid
+behavioral detection systems.
+"""
+
+import random
+import time
+import logging
+from dataclasses import dataclass
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class SessionSchedule:
+    """Defines a play session schedule."""
+    min_session_hours: float = 1.0
+    max_session_hours: float = 4.0
+    min_break_minutes: float = 10.0
+    max_break_minutes: float = 45.0
+    max_daily_hours: float = 12.0
+
+
+class SessionTimer:
+    """Manages bot session timing to mimic human play patterns."""
+    
+    def __init__(self, schedule: Optional[SessionSchedule] = None):
+        self.schedule = schedule or SessionSchedule()
+        self._session_start = time.time()
+        self._daily_playtime = 0.0
+        self._day_start = time.time()
+        self._target_duration = self._roll_session_duration()
+    
+    def _roll_session_duration(self) -> float:
+        """Generate random session duration in seconds."""
+        hours = random.uniform(
+            self.schedule.min_session_hours,
+            self.schedule.max_session_hours,
+        )
+        return hours * 3600
+    
+    def session_elapsed(self) -> float:
+        """Seconds elapsed in current session."""
+        return time.time() - self._session_start
+    
+    def should_stop_session(self) -> bool:
+        """Check if current session should end."""
+        if self.session_elapsed() >= self._target_duration:
+            return True
+        if self._daily_playtime + self.session_elapsed() >= self.schedule.max_daily_hours * 3600:
+            return True
+        return False
+    
+    def get_break_duration(self) -> float:
+        """Get randomized break duration in seconds."""
+        return random.uniform(
+            self.schedule.min_break_minutes * 60,
+            self.schedule.max_break_minutes * 60,
+        )
+    
+    def start_new_session(self) -> None:
+        """Start a new play session after break."""
+        self._daily_playtime += self.session_elapsed()
+        self._session_start = time.time()
+        self._target_duration = self._roll_session_duration()
+        logger.info(f"New session: {self._target_duration/3600:.1f}h target")
--- a/engine/screen/init.py
+++ b/engine/screen/init.py
@ -0,0 +1,23 @@
+"""Screen reading components for visual game state detection.
+
+This module provides tools for capturing, analyzing, and extracting information
+from game screenshots without requiring memory access or game modification.
+
+Components:
+- capture: Screenshot capture using various backends
+- ocr: Optical Character Recognition for text extraction  
+- template: Template matching for UI element detection
+"""
+
+from .capture import ScreenCapture, ScreenRegion
+from .ocr import OCREngine, TextDetector
+from .template import TemplateManager, TemplateMatcher
+
+__all__ = [
+    "ScreenCapture",
+    "ScreenRegion", 
+    "OCREngine",
+    "TextDetector",
+    "TemplateManager",
+    "TemplateMatcher",
+]
--- a/engine/screen/capture.py
+++ b/engine/screen/capture.py
@ -0,0 +1,220 @@
+"""Screen capture utilities for taking game screenshots.
+
+Provides efficient screenshot capture using multiple backends (mss, PIL)
+with support for specific regions and window targeting.
+"""
+
+from typing import Tuple, Optional, Dict, Any
+from dataclasses import dataclass
+import time
+import logging
+
+import numpy as np
+from PIL import Image, ImageGrab
+import mss
+import cv2
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ScreenRegion:
+    """Defines a rectangular region of the screen to capture."""
+    
+    x: int
+    y: int 
+    width: int
+    height: int
+    
+    @property
+    def bounds(self) -> Tuple[int, int, int, int]:
+        """Return region as (left, top, right, bottom) tuple."""
+        return (self.x, self.y, self.x + self.width, self.y + self.height)
+    
+    @property
+    def mss_bounds(self) -> Dict[str, int]:
+        """Return region in MSS format."""
+        return {
+            "top": self.y,
+            "left": self.x,
+            "width": self.width,
+            "height": self.height,
+        }
+
+
+class ScreenCapture:
+    """High-performance screen capture with multiple backends."""
+    
+    def __init__(self, backend: str = "mss", monitor: int = 1):
+        """Initialize screen capture.
+        
+        Args:
+            backend: Capture backend ("mss" or "pil")
+            monitor: Monitor number to capture from (1-indexed)
+        """
+        self.backend = backend
+        self.monitor = monitor
+        self._mss_instance: Optional[mss.mss] = None
+        self._monitor_info: Optional[Dict[str, int]] = None
+        
+        if backend == "mss":
+            self._initialize_mss()
+    
+    def _initialize_mss(self) -> None:
+        """Initialize MSS backend."""
+        try:
+            self._mss_instance = mss.mss()
+            monitors = self._mss_instance.monitors
+            
+            if self.monitor >= len(monitors):
+                logger.warning(f"Monitor {self.monitor} not found, using primary")
+                self.monitor = 1
+                
+            self._monitor_info = monitors[self.monitor]
+            logger.info(f"Initialized MSS capture for monitor {self.monitor}: "
+                       f"{self._monitor_info['width']}x{self._monitor_info['height']}")
+                       
+        except Exception as e:
+            logger.error(f"Failed to initialize MSS: {e}")
+            self.backend = "pil"
+    
+    def capture_screen(self, region: Optional[ScreenRegion] = None) -> np.ndarray:
+        """Capture screenshot of screen or region.
+        
+        Args:
+            region: Specific region to capture, or None for full screen
+            
+        Returns:
+            Screenshot as numpy array in BGR format (for OpenCV compatibility)
+        """
+        try:
+            if self.backend == "mss":
+                return self._capture_mss(region)
+            else:
+                return self._capture_pil(region)
+                
+        except Exception as e:
+            logger.error(f"Screen capture failed: {e}")
+            # Fallback to empty image
+            return np.zeros((100, 100, 3), dtype=np.uint8)
+    
+    def _capture_mss(self, region: Optional[ScreenRegion]) -> np.ndarray:
+        """Capture using MSS backend."""
+        if not self._mss_instance:
+            raise RuntimeError("MSS not initialized")
+        
+        if region:
+            monitor = region.mss_bounds
+        else:
+            monitor = self._monitor_info or self._mss_instance.monitors[self.monitor]
+        
+        # MSS returns BGRA format
+        screenshot = self._mss_instance.grab(monitor)
+        img_array = np.frombuffer(screenshot.rgb, dtype=np.uint8)
+        img_array = img_array.reshape((screenshot.height, screenshot.width, 3))
+        
+        # Convert RGB to BGR for OpenCV
+        return cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
+    
+    def _capture_pil(self, region: Optional[ScreenRegion]) -> np.ndarray:
+        """Capture using PIL backend."""
+        if region:
+            bbox = region.bounds
+        else:
+            bbox = None
+        
+        # PIL returns RGB format
+        screenshot = ImageGrab.grab(bbox=bbox)
+        img_array = np.array(screenshot)
+        
+        # Convert RGB to BGR for OpenCV
+        return cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
+    
+    def save_screenshot(self, filename: str, region: Optional[ScreenRegion] = None) -> bool:
+        """Save screenshot to file.
+        
+        Args:
+            filename: Output filename
+            region: Region to capture, or None for full screen
+            
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            img = self.capture_screen(region)
+            return cv2.imwrite(filename, img)
+            
+        except Exception as e:
+            logger.error(f"Failed to save screenshot: {e}")
+            return False
+    
+    def get_screen_size(self) -> Tuple[int, int]:
+        """Get screen dimensions.
+        
+        Returns:
+            (width, height) tuple
+        """
+        if self.backend == "mss" and self._monitor_info:
+            return (self._monitor_info["width"], self._monitor_info["height"])
+        else:
+            # Use PIL as fallback
+            screenshot = ImageGrab.grab()
+            return screenshot.size
+    
+    def find_window(self, window_title: str) -> Optional[ScreenRegion]:
+        """Find window by title and return its region.
+        
+        Args:
+            window_title: Partial or full window title to search for
+            
+        Returns:
+            ScreenRegion if window found, None otherwise
+            
+        Note:
+            This is a placeholder - actual implementation would use
+            platform-specific window enumeration (e.g., Windows API, X11)
+        """
+        # TODO: Implement window finding
+        logger.warning("Window finding not implemented yet")
+        return None
+    
+    def benchmark_capture(self, iterations: int = 100) -> Dict[str, float]:
+        """Benchmark capture performance.
+        
+        Args:
+            iterations: Number of captures to perform
+            
+        Returns:
+            Performance statistics
+        """
+        logger.info(f"Benchmarking {self.backend} backend ({iterations} iterations)")
+        
+        start_time = time.perf_counter()
+        
+        for _ in range(iterations):
+            self.capture_screen()
+        
+        end_time = time.perf_counter()
+        total_time = end_time - start_time
+        avg_time = total_time / iterations
+        fps = iterations / total_time
+        
+        stats = {
+            "backend": self.backend,
+            "iterations": iterations,
+            "total_time": total_time,
+            "avg_time_ms": avg_time * 1000,
+            "fps": fps,
+        }
+        
+        logger.info(f"Benchmark results: {avg_time*1000:.2f}ms avg, {fps:.1f} FPS")
+        return stats
+    
+    def __enter__(self):
+        """Context manager entry."""
+        return self
+    
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit."""
+        if self._mss_instance:
+            self._mss_instance.close()
--- a/engine/screen/ocr.py
+++ b/engine/screen/ocr.py
@ -0,0 +1,346 @@
+"""OCR (Optical Character Recognition) for extracting text from screenshots.
+
+Provides text detection and extraction capabilities using pytesseract
+with preprocessing for better accuracy in game environments.
+"""
+
+from typing import List, Dict, Optional, Tuple, NamedTuple
+import logging
+import re
+
+import cv2
+import numpy as np
+import pytesseract
+from PIL import Image
+
+logger = logging.getLogger(__name__)
+
+
+class TextMatch(NamedTuple):
+    """Represents detected text with position and confidence."""
+    text: str
+    confidence: float
+    bbox: Tuple[int, int, int, int]  # (x, y, width, height)
+    
+
+class OCRConfig:
+    """Configuration for OCR processing."""
+    
+    def __init__(self):
+        # Tesseract configuration
+        self.tesseract_config = "--oem 3 --psm 6"  # Default config
+        self.language = "eng"
+        self.min_confidence = 30.0
+        
+        # Image preprocessing
+        self.preprocess = True
+        self.scale_factor = 2.0
+        self.denoise = True
+        self.contrast_enhance = True
+        
+        # Text filtering
+        self.min_text_length = 1
+        self.filter_patterns = [
+            r'^[a-zA-Z0-9\s\-_:.,/]+$',  # Alphanumeric with common punctuation
+        ]
+
+
+class OCREngine:
+    """OCR engine for text extraction from game screenshots."""
+    
+    def __init__(self, config: Optional[OCRConfig] = None):
+        """Initialize OCR engine.
+        
+        Args:
+            config: OCR configuration, or None for defaults
+        """
+        self.config = config or OCRConfig()
+        self._verify_tesseract()
+    
+    def _verify_tesseract(self) -> None:
+        """Verify tesseract installation."""
+        try:
+            pytesseract.get_tesseract_version()
+            logger.info("Tesseract initialized successfully")
+        except Exception as e:
+            logger.error(f"Tesseract not found or not working: {e}")
+            raise RuntimeError("Tesseract OCR is required but not available")
+    
+    def extract_text(self, image: np.ndarray, region: Optional[Tuple[int, int, int, int]] = None) -> str:
+        """Extract all text from image.
+        
+        Args:
+            image: Input image as numpy array
+            region: Optional (x, y, width, height) region to process
+            
+        Returns:
+            Extracted text as string
+        """
+        processed_img = self._preprocess_image(image, region)
+        
+        try:
+            text = pytesseract.image_to_string(
+                processed_img,
+                lang=self.config.language,
+                config=self.config.tesseract_config
+            )
+            
+            return self._clean_text(text)
+            
+        except Exception as e:
+            logger.error(f"OCR extraction failed: {e}")
+            return ""
+    
+    def find_text(self, image: np.ndarray, search_text: str, 
+                  case_sensitive: bool = False) -> List[TextMatch]:
+        """Find specific text in image with positions.
+        
+        Args:
+            image: Input image as numpy array
+            search_text: Text to search for
+            case_sensitive: Whether search should be case sensitive
+            
+        Returns:
+            List of TextMatch objects for found text
+        """
+        processed_img = self._preprocess_image(image)
+        
+        try:
+            # Get detailed OCR data
+            data = pytesseract.image_to_data(
+                processed_img,
+                lang=self.config.language,
+                config=self.config.tesseract_config,
+                output_type=pytesseract.Output.DICT
+            )
+            
+            matches = []
+            search_lower = search_text.lower() if not case_sensitive else search_text
+            
+            for i in range(len(data['text'])):
+                text = data['text'][i].strip()
+                confidence = float(data['conf'][i])
+                
+                if confidence < self.config.min_confidence:
+                    continue
+                
+                text_to_match = text.lower() if not case_sensitive else text
+                
+                if search_lower in text_to_match:
+                    bbox = (
+                        data['left'][i],
+                        data['top'][i], 
+                        data['width'][i],
+                        data['height'][i]
+                    )
+                    
+                    matches.append(TextMatch(text, confidence, bbox))
+            
+            return matches
+            
+        except Exception as e:
+            logger.error(f"Text search failed: {e}")
+            return []
+    
+    def get_text_regions(self, image: np.ndarray) -> List[TextMatch]:
+        """Get all text regions with positions and confidence.
+        
+        Args:
+            image: Input image as numpy array
+            
+        Returns:
+            List of TextMatch objects for all detected text
+        """
+        processed_img = self._preprocess_image(image)
+        
+        try:
+            data = pytesseract.image_to_data(
+                processed_img,
+                lang=self.config.language,
+                config=self.config.tesseract_config,
+                output_type=pytesseract.Output.DICT
+            )
+            
+            text_regions = []
+            
+            for i in range(len(data['text'])):
+                text = data['text'][i].strip()
+                confidence = float(data['conf'][i])
+                
+                if (confidence < self.config.min_confidence or 
+                    len(text) < self.config.min_text_length):
+                    continue
+                
+                if not self._passes_text_filters(text):
+                    continue
+                
+                bbox = (
+                    data['left'][i],
+                    data['top'][i],
+                    data['width'][i], 
+                    data['height'][i]
+                )
+                
+                text_regions.append(TextMatch(text, confidence, bbox))
+            
+            return text_regions
+            
+        except Exception as e:
+            logger.error(f"Text region detection failed: {e}")
+            return []
+    
+    def _preprocess_image(self, image: np.ndarray, 
+                         region: Optional[Tuple[int, int, int, int]] = None) -> Image.Image:
+        """Preprocess image for better OCR accuracy.
+        
+        Args:
+            image: Input image as numpy array
+            region: Optional region to extract
+            
+        Returns:
+            Preprocessed PIL Image
+        """
+        # Extract region if specified
+        if region:
+            x, y, w, h = region
+            image = image[y:y+h, x:x+w]
+        
+        if not self.config.preprocess:
+            return Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+        
+        # Convert to grayscale
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        
+        # Scale up for better OCR
+        if self.config.scale_factor > 1.0:
+            height, width = gray.shape
+            new_width = int(width * self.config.scale_factor)
+            new_height = int(height * self.config.scale_factor)
+            gray = cv2.resize(gray, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
+        
+        # Denoise
+        if self.config.denoise:
+            gray = cv2.fastNlMeansDenoising(gray)
+        
+        # Enhance contrast
+        if self.config.contrast_enhance:
+            # Use CLAHE (Contrast Limited Adaptive Histogram Equalization)
+            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
+            gray = clahe.apply(gray)
+        
+        # Convert back to PIL Image
+        return Image.fromarray(gray)
+    
+    def _clean_text(self, text: str) -> str:
+        """Clean extracted text.
+        
+        Args:
+            text: Raw extracted text
+            
+        Returns:
+            Cleaned text
+        """
+        # Remove extra whitespace
+        text = re.sub(r'\s+', ' ', text.strip())
+        
+        # Remove common OCR artifacts
+        text = re.sub(r'[|¦]', 'I', text)  # Vertical bars to I
+        text = re.sub(r'[{}]', '', text)   # Remove braces
+        
+        return text
+    
+    def _passes_text_filters(self, text: str) -> bool:
+        """Check if text passes configured filters.
+        
+        Args:
+            text: Text to check
+            
+        Returns:
+            True if text passes filters
+        """
+        if not self.config.filter_patterns:
+            return True
+        
+        for pattern in self.config.filter_patterns:
+            if re.match(pattern, text):
+                return True
+        
+        return False
+
+
+class TextDetector:
+    """High-level text detection interface."""
+    
+    def __init__(self, ocr_config: Optional[OCRConfig] = None):
+        """Initialize text detector.
+        
+        Args:
+            ocr_config: OCR configuration
+        """
+        self.ocr = OCREngine(ocr_config)
+        self.text_cache: Dict[str, List[TextMatch]] = {}
+    
+    def contains_text(self, image: np.ndarray, text: str, 
+                     case_sensitive: bool = False) -> bool:
+        """Check if image contains specific text.
+        
+        Args:
+            image: Input image
+            text: Text to search for
+            case_sensitive: Case sensitive search
+            
+        Returns:
+            True if text found
+        """
+        matches = self.ocr.find_text(image, text, case_sensitive)
+        return len(matches) > 0
+    
+    def wait_for_text(self, capture_func, text: str, timeout: float = 10.0,
+                     check_interval: float = 0.5) -> bool:
+        """Wait for specific text to appear on screen.
+        
+        Args:
+            capture_func: Function that returns screenshot
+            text: Text to wait for
+            timeout: Maximum wait time in seconds
+            check_interval: Time between checks in seconds
+            
+        Returns:
+            True if text appeared, False if timeout
+        """
+        import time
+        
+        start_time = time.time()
+        
+        while time.time() - start_time < timeout:
+            image = capture_func()
+            if self.contains_text(image, text):
+                return True
+            
+            time.sleep(check_interval)
+        
+        return False
+    
+    def get_ui_text(self, image: np.ndarray) -> Dict[str, str]:
+        """Extract common UI text elements.
+        
+        Args:
+            image: Input image
+            
+        Returns:
+            Dictionary mapping UI elements to text
+        """
+        # This is a placeholder for game-specific UI text extraction
+        # In practice, this would define regions for health, mana, inventory, etc.
+        text_regions = self.ocr.get_text_regions(image)
+        
+        ui_text = {}
+        for region in text_regions:
+            # Categorize text based on position or pattern
+            if "health" in region.text.lower():
+                ui_text["health"] = region.text
+            elif "mana" in region.text.lower():
+                ui_text["mana"] = region.text
+            # Add more UI element detection
+        
+        return ui_text
--- a/engine/screen/template.py
+++ b/engine/screen/template.py
@ -0,0 +1,403 @@
+"""Template matching for UI element detection in game screenshots.
+
+Provides efficient template matching using OpenCV with support for
+multiple templates, confidence thresholds, and template management.
+"""
+
+from typing import List, Dict, Optional, Tuple, NamedTuple
+from pathlib import Path
+import logging
+from dataclasses import dataclass
+
+import cv2
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+class TemplateMatch(NamedTuple):
+    """Represents a template match with position and confidence."""
+    template_name: str
+    confidence: float
+    center: Tuple[int, int]  # (x, y) center position
+    bbox: Tuple[int, int, int, int]  # (x, y, width, height)
+
+
+@dataclass
+class TemplateInfo:
+    """Information about a loaded template."""
+    name: str
+    image: np.ndarray
+    width: int
+    height: int
+    path: Optional[str] = None
+
+
+class TemplateMatcher:
+    """Core template matching functionality."""
+    
+    def __init__(self, method: int = cv2.TM_CCOEFF_NORMED, 
+                 threshold: float = 0.8):
+        """Initialize template matcher.
+        
+        Args:
+            method: OpenCV template matching method
+            threshold: Minimum confidence threshold (0.0 to 1.0)
+        """
+        self.method = method
+        self.threshold = threshold
+    
+    def match_template(self, image: np.ndarray, template: np.ndarray,
+                      threshold: Optional[float] = None) -> List[TemplateMatch]:
+        """Match single template in image.
+        
+        Args:
+            image: Source image to search in
+            template: Template image to find
+            threshold: Confidence threshold override
+            
+        Returns:
+            List of matches found
+        """
+        if threshold is None:
+            threshold = self.threshold
+        
+        # Convert to grayscale if needed
+        if len(image.shape) == 3:
+            image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        else:
+            image_gray = image
+            
+        if len(template.shape) == 3:
+            template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
+        else:
+            template_gray = template
+        
+        # Perform template matching
+        result = cv2.matchTemplate(image_gray, template_gray, self.method)
+        
+        # Find matches above threshold
+        locations = np.where(result >= threshold)
+        
+        matches = []
+        template_h, template_w = template_gray.shape
+        
+        for pt in zip(*locations[::-1]):  # Switch x and y
+            x, y = pt
+            confidence = result[y, x]
+            
+            center = (x + template_w // 2, y + template_h // 2)
+            bbox = (x, y, template_w, template_h)
+            
+            matches.append(TemplateMatch("", confidence, center, bbox))
+        
+        # Remove overlapping matches (Non-Maximum Suppression)
+        matches = self._apply_nms(matches, overlap_threshold=0.3)
+        
+        return matches
+    
+    def match_multiple_scales(self, image: np.ndarray, template: np.ndarray,
+                             scales: List[float] = None,
+                             threshold: Optional[float] = None) -> List[TemplateMatch]:
+        """Match template at multiple scales.
+        
+        Args:
+            image: Source image
+            template: Template image
+            scales: List of scale factors to try
+            threshold: Confidence threshold
+            
+        Returns:
+            List of matches at all scales
+        """
+        if scales is None:
+            scales = [0.8, 0.9, 1.0, 1.1, 1.2]
+        
+        all_matches = []
+        
+        for scale in scales:
+            # Scale template
+            new_width = int(template.shape[1] * scale)
+            new_height = int(template.shape[0] * scale)
+            
+            if new_width < 10 or new_height < 10:
+                continue  # Skip very small templates
+            
+            scaled_template = cv2.resize(template, (new_width, new_height))
+            
+            # Find matches at this scale
+            matches = self.match_template(image, scaled_template, threshold)
+            all_matches.extend(matches)
+        
+        # Apply NMS across all scales
+        all_matches = self._apply_nms(all_matches, overlap_threshold=0.5)
+        
+        return all_matches
+    
+    def _apply_nms(self, matches: List[TemplateMatch], 
+                  overlap_threshold: float = 0.3) -> List[TemplateMatch]:
+        """Apply Non-Maximum Suppression to remove overlapping matches.
+        
+        Args:
+            matches: List of template matches
+            overlap_threshold: Maximum allowed overlap ratio
+            
+        Returns:
+            Filtered list of matches
+        """
+        if not matches:
+            return matches
+        
+        # Sort by confidence (highest first)
+        matches = sorted(matches, key=lambda x: x.confidence, reverse=True)
+        
+        filtered_matches = []
+        
+        for match in matches:
+            # Check if this match overlaps significantly with any kept match
+            is_duplicate = False
+            
+            for kept_match in filtered_matches:
+                if self._calculate_overlap(match, kept_match) > overlap_threshold:
+                    is_duplicate = True
+                    break
+            
+            if not is_duplicate:
+                filtered_matches.append(match)
+        
+        return filtered_matches
+    
+    def _calculate_overlap(self, match1: TemplateMatch, match2: TemplateMatch) -> float:
+        """Calculate overlap ratio between two matches.
+        
+        Args:
+            match1: First match
+            match2: Second match
+            
+        Returns:
+            Overlap ratio (0.0 to 1.0)
+        """
+        x1, y1, w1, h1 = match1.bbox
+        x2, y2, w2, h2 = match2.bbox
+        
+        # Calculate intersection
+        left = max(x1, x2)
+        right = min(x1 + w1, x2 + w2)
+        top = max(y1, y2)
+        bottom = min(y1 + h1, y2 + h2)
+        
+        if left >= right or top >= bottom:
+            return 0.0
+        
+        intersection = (right - left) * (bottom - top)
+        area1 = w1 * h1
+        area2 = w2 * h2
+        union = area1 + area2 - intersection
+        
+        return intersection / union if union > 0 else 0.0
+
+
+class TemplateManager:
+    """Manages a collection of templates for game UI detection."""
+    
+    def __init__(self, template_dir: Optional[Path] = None):
+        """Initialize template manager.
+        
+        Args:
+            template_dir: Directory containing template images
+        """
+        self.template_dir = template_dir
+        self.templates: Dict[str, TemplateInfo] = {}
+        self.matcher = TemplateMatcher()
+        
+        if template_dir and template_dir.exists():
+            self.load_templates_from_directory(template_dir)
+    
+    def load_template(self, name: str, image_path: Path) -> bool:
+        """Load single template from file.
+        
+        Args:
+            name: Template identifier
+            image_path: Path to template image
+            
+        Returns:
+            True if loaded successfully
+        """
+        try:
+            image = cv2.imread(str(image_path))
+            if image is None:
+                logger.error(f"Could not load template image: {image_path}")
+                return False
+            
+            height, width = image.shape[:2]
+            
+            self.templates[name] = TemplateInfo(
+                name=name,
+                image=image,
+                width=width,
+                height=height,
+                path=str(image_path)
+            )
+            
+            logger.info(f"Loaded template '{name}' ({width}x{height})")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Failed to load template '{name}': {e}")
+            return False
+    
+    def load_templates_from_directory(self, directory: Path) -> int:
+        """Load all templates from directory.
+        
+        Args:
+            directory: Directory containing template images
+            
+        Returns:
+            Number of templates loaded
+        """
+        loaded_count = 0
+        
+        for image_path in directory.glob("*.png"):
+            template_name = image_path.stem
+            if self.load_template(template_name, image_path):
+                loaded_count += 1
+        
+        logger.info(f"Loaded {loaded_count} templates from {directory}")
+        return loaded_count
+    
+    def find_template(self, image: np.ndarray, template_name: str,
+                     threshold: Optional[float] = None) -> List[TemplateMatch]:
+        """Find specific template in image.
+        
+        Args:
+            image: Source image
+            template_name: Name of template to find
+            threshold: Confidence threshold override
+            
+        Returns:
+            List of matches found
+        """
+        if template_name not in self.templates:
+            logger.warning(f"Template '{template_name}' not found")
+            return []
+        
+        template_info = self.templates[template_name]
+        matches = self.matcher.match_template(image, template_info.image, threshold)
+        
+        # Set template name in matches
+        named_matches = []
+        for match in matches:
+            named_match = TemplateMatch(
+                template_name=template_name,
+                confidence=match.confidence,
+                center=match.center,
+                bbox=match.bbox
+            )
+            named_matches.append(named_match)
+        
+        return named_matches
+    
+    def find_any_template(self, image: np.ndarray, 
+                         template_names: Optional[List[str]] = None,
+                         threshold: Optional[float] = None) -> List[TemplateMatch]:
+        """Find any of the specified templates in image.
+        
+        Args:
+            image: Source image
+            template_names: List of template names to search for, or None for all
+            threshold: Confidence threshold override
+            
+        Returns:
+            List of all matches found
+        """
+        if template_names is None:
+            template_names = list(self.templates.keys())
+        
+        all_matches = []
+        
+        for template_name in template_names:
+            matches = self.find_template(image, template_name, threshold)
+            all_matches.extend(matches)
+        
+        # Sort by confidence
+        all_matches.sort(key=lambda x: x.confidence, reverse=True)
+        
+        return all_matches
+    
+    def wait_for_template(self, capture_func, template_name: str,
+                         timeout: float = 10.0, check_interval: float = 0.5,
+                         threshold: Optional[float] = None) -> Optional[TemplateMatch]:
+        """Wait for template to appear on screen.
+        
+        Args:
+            capture_func: Function that returns screenshot
+            template_name: Template to wait for
+            timeout: Maximum wait time in seconds
+            check_interval: Time between checks in seconds
+            threshold: Confidence threshold override
+            
+        Returns:
+            First match found, or None if timeout
+        """
+        import time
+        
+        start_time = time.time()
+        
+        while time.time() - start_time < timeout:
+            image = capture_func()
+            matches = self.find_template(image, template_name, threshold)
+            
+            if matches:
+                return matches[0]  # Return best match
+            
+            time.sleep(check_interval)
+        
+        return None
+    
+    def get_template_info(self, template_name: str) -> Optional[TemplateInfo]:
+        """Get information about loaded template.
+        
+        Args:
+            template_name: Name of template
+            
+        Returns:
+            TemplateInfo object or None if not found
+        """
+        return self.templates.get(template_name)
+    
+    def list_templates(self) -> List[str]:
+        """Get list of all loaded template names.
+        
+        Returns:
+            List of template names
+        """
+        return list(self.templates.keys())
+    
+    def create_debug_image(self, image: np.ndarray, matches: List[TemplateMatch]) -> np.ndarray:
+        """Create debug image showing template matches.
+        
+        Args:
+            image: Original image
+            matches: List of matches to highlight
+            
+        Returns:
+            Debug image with matches drawn
+        """
+        debug_img = image.copy()
+        
+        for match in matches:
+            x, y, w, h = match.bbox
+            
+            # Draw bounding box
+            cv2.rectangle(debug_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
+            
+            # Draw center point
+            center_x, center_y = match.center
+            cv2.circle(debug_img, (center_x, center_y), 5, (255, 0, 0), -1)
+            
+            # Draw template name and confidence
+            label = f"{match.template_name}: {match.confidence:.2f}"
+            cv2.putText(debug_img, label, (x, y - 10),
+                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+        
+        return debug_img
--- a/engine/state/init.py
+++ b/engine/state/init.py
--- a/engine/state/events.py
+++ b/engine/state/events.py
@ -0,0 +1,34 @@
+"""Event system for inter-component communication."""
+
+from typing import Callable, Any, Dict, List
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class EventBus:
+    """Simple publish/subscribe event system."""
+    
+    def __init__(self):
+        self._listeners: Dict[str, List[Callable]] = {}
+    
+    def on(self, event: str, callback: Callable) -> None:
+        """Subscribe to an event."""
+        self._listeners.setdefault(event, []).append(callback)
+    
+    def off(self, event: str, callback: Callable) -> None:
+        """Unsubscribe from an event."""
+        if event in self._listeners:
+            self._listeners[event] = [cb for cb in self._listeners[event] if cb != callback]
+    
+    def emit(self, event: str, **data: Any) -> None:
+        """Emit an event to all subscribers."""
+        for cb in self._listeners.get(event, []):
+            try:
+                cb(**data)
+            except Exception as e:
+                logger.error(f"Event handler error for '{event}': {e}")
+    
+    def clear(self) -> None:
+        """Remove all listeners."""
+        self._listeners.clear()
--- a/engine/state/manager.py
+++ b/engine/state/manager.py
@ -0,0 +1,105 @@
+"""Game state machine management.
+
+Provides a base state manager that game implementations extend
+to detect and track game states (menu, in-game, inventory, etc.).
+"""
+
+from typing import Optional, Callable, Dict, Any
+from enum import Enum, auto
+from dataclasses import dataclass
+import logging
+import time
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+class BaseGameState(Enum):
+    """Base states common to most games."""
+    UNKNOWN = auto()
+    LOADING = auto()
+    MAIN_MENU = auto()
+    CHARACTER_SELECT = auto()
+    IN_GAME = auto()
+    INVENTORY = auto()
+    DEAD = auto()
+    DISCONNECTED = auto()
+
+
+@dataclass
+class StateTransition:
+    """Records a state transition."""
+    from_state: BaseGameState
+    to_state: BaseGameState
+    timestamp: float
+    metadata: Dict[str, Any] = None
+
+
+class GameStateManager:
+    """Base class for game state detection and management.
+    
+    Game implementations should subclass this and implement
+    detect_state() with game-specific screen analysis.
+    """
+    
+    def __init__(self):
+        self._current_state: BaseGameState = BaseGameState.UNKNOWN
+        self._previous_state: BaseGameState = BaseGameState.UNKNOWN
+        self._state_enter_time: float = time.time()
+        self._history: list[StateTransition] = []
+        self._callbacks: Dict[BaseGameState, list[Callable]] = {}
+    
+    @property
+    def current_state(self) -> BaseGameState:
+        return self._current_state
+    
+    @property
+    def previous_state(self) -> BaseGameState:
+        return self._previous_state
+    
+    @property
+    def time_in_state(self) -> float:
+        """Seconds spent in current state."""
+        return time.time() - self._state_enter_time
+    
+    def detect_state(self, screen: np.ndarray) -> BaseGameState:
+        """Detect current game state from screenshot.
+        
+        Must be overridden by game implementations.
+        """
+        raise NotImplementedError("Subclasses must implement detect_state()")
+    
+    def update(self, screen: np.ndarray) -> BaseGameState:
+        """Update state from current screen. Triggers callbacks on change."""
+        new_state = self.detect_state(screen)
+        
+        if new_state != self._current_state:
+            transition = StateTransition(
+                from_state=self._current_state,
+                to_state=new_state,
+                timestamp=time.time(),
+            )
+            self._history.append(transition)
+            
+            logger.info(f"State: {self._current_state.name} → {new_state.name}")
+            
+            self._previous_state = self._current_state
+            self._current_state = new_state
+            self._state_enter_time = time.time()
+            
+            # Fire callbacks
+            for cb in self._callbacks.get(new_state, []):
+                try:
+                    cb(transition)
+                except Exception as e:
+                    logger.error(f"State callback error: {e}")
+        
+        return self._current_state
+    
+    def on_state(self, state: BaseGameState, callback: Callable) -> None:
+        """Register a callback for when entering a state."""
+        self._callbacks.setdefault(state, []).append(callback)
+    
+    def is_state(self, state: BaseGameState) -> bool:
+        return self._current_state == state
--- a/engine/vision/init.py
+++ b/engine/vision/init.py
--- a/engine/vision/color.py
+++ b/engine/vision/color.py
@ -0,0 +1,87 @@
+"""Color and pixel analysis utilities.
+
+Provides tools for reading health/mana bars, detecting UI states
+via color sampling, and pixel-level game state detection.
+"""
+
+from typing import Tuple, Optional, List
+import logging
+
+import numpy as np
+import cv2
+
+logger = logging.getLogger(__name__)
+
+
+class ColorAnalyzer:
+    """Analyze pixel colors and UI bar states."""
+    
+    @staticmethod
+    def get_pixel_color(screen: np.ndarray, x: int, y: int) -> Tuple[int, int, int]:
+        """Get BGR color at pixel position."""
+        return tuple(screen[y, x].tolist())
+    
+    @staticmethod
+    def get_pixel_hsv(screen: np.ndarray, x: int, y: int) -> Tuple[int, int, int]:
+        """Get HSV color at pixel position."""
+        hsv = cv2.cvtColor(screen[y:y+1, x:x+1], cv2.COLOR_BGR2HSV)
+        return tuple(hsv[0, 0].tolist())
+    
+    @staticmethod
+    def color_matches(
+        color: Tuple[int, int, int],
+        target: Tuple[int, int, int],
+        tolerance: int = 20,
+    ) -> bool:
+        """Check if a color matches target within tolerance."""
+        return all(abs(c - t) <= tolerance for c, t in zip(color, target))
+    
+    @staticmethod
+    def read_bar_percentage(
+        screen: np.ndarray,
+        bar_region: Tuple[int, int, int, int],
+        filled_color_hsv: Tuple[Tuple[int, int, int], Tuple[int, int, int]],
+    ) -> float:
+        """Read a horizontal bar's fill percentage (health, mana, xp, etc.).
+        
+        Args:
+            screen: Screenshot in BGR
+            bar_region: (x, y, width, height) of the bar
+            filled_color_hsv: (lower_hsv, upper_hsv) range of the filled portion
+            
+        Returns:
+            Fill percentage 0.0 to 1.0
+        """
+        x, y, w, h = bar_region
+        bar = screen[y:y+h, x:x+w]
+        hsv = cv2.cvtColor(bar, cv2.COLOR_BGR2HSV)
+        
+        lower, upper = filled_color_hsv
+        mask = cv2.inRange(hsv, np.array(lower), np.array(upper))
+        
+        # Scan columns left to right to find the fill boundary
+        col_fill = np.mean(mask, axis=0) / 255.0
+        
+        # Find the rightmost column that's mostly filled
+        threshold = 0.3
+        filled_cols = np.where(col_fill > threshold)[0]
+        
+        if len(filled_cols) == 0:
+            return 0.0
+        
+        return (filled_cols[-1] + 1) / w
+    
+    @staticmethod
+    def sample_region_dominant_color(
+        screen: np.ndarray,
+        region: Tuple[int, int, int, int],
+    ) -> Tuple[int, int, int]:
+        """Get the dominant BGR color in a region."""
+        x, y, w, h = region
+        roi = screen[y:y+h, x:x+w]
+        pixels = roi.reshape(-1, 3).astype(np.float32)
+        
+        criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
+        _, labels, centers = cv2.kmeans(pixels, 1, None, criteria, 3, cv2.KMEANS_RANDOM_CENTERS)
+        
+        return tuple(centers[0].astype(int).tolist())
--- a/engine/vision/detector.py
+++ b/engine/vision/detector.py
@ -0,0 +1,140 @@
+"""Object and UI element detection using computer vision.
+
+Provides high-level detection for game elements using template matching,
+color filtering, and contour analysis.
+"""
+
+from typing import List, Optional, Tuple
+from dataclasses import dataclass
+import logging
+
+import numpy as np
+import cv2
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Detection:
+    """Represents a detected object/element on screen."""
+    
+    x: int
+    y: int
+    width: int
+    height: int
+    confidence: float
+    label: str = ""
+    
+    @property
+    def center(self) -> Tuple[int, int]:
+        return (self.x + self.width // 2, self.y + self.height // 2)
+    
+    @property
+    def bounds(self) -> Tuple[int, int, int, int]:
+        return (self.x, self.y, self.x + self.width, self.y + self.height)
+
+
+class ElementDetector:
+    """Detects game UI elements and objects via computer vision."""
+    
+    def __init__(self, confidence_threshold: float = 0.8):
+        self.confidence_threshold = confidence_threshold
+        self._templates: dict[str, np.ndarray] = {}
+    
+    def load_template(self, name: str, image_path: str) -> None:
+        """Load a template image for matching."""
+        template = cv2.imread(image_path, cv2.IMREAD_COLOR)
+        if template is None:
+            raise FileNotFoundError(f"Template not found: {image_path}")
+        self._templates[name] = template
+        logger.debug(f"Loaded template '{name}': {template.shape}")
+    
+    def find_template(
+        self, screen: np.ndarray, template_name: str,
+        method: int = cv2.TM_CCOEFF_NORMED,
+    ) -> Optional[Detection]:
+        """Find best match of a template in the screen image."""
+        if template_name not in self._templates:
+            logger.error(f"Unknown template: {template_name}")
+            return None
+        
+        template = self._templates[template_name]
+        result = cv2.matchTemplate(screen, template, method)
+        _, max_val, _, max_loc = cv2.minMaxLoc(result)
+        
+        if max_val >= self.confidence_threshold:
+            h, w = template.shape[:2]
+            return Detection(
+                x=max_loc[0], y=max_loc[1],
+                width=w, height=h,
+                confidence=max_val, label=template_name,
+            )
+        return None
+    
+    def find_all_templates(
+        self, screen: np.ndarray, template_name: str,
+        method: int = cv2.TM_CCOEFF_NORMED,
+    ) -> List[Detection]:
+        """Find all matches of a template above confidence threshold."""
+        if template_name not in self._templates:
+            return []
+        
+        template = self._templates[template_name]
+        h, w = template.shape[:2]
+        result = cv2.matchTemplate(screen, template, method)
+        
+        locations = np.where(result >= self.confidence_threshold)
+        detections = []
+        
+        for pt in zip(*locations[::-1]):
+            detections.append(Detection(
+                x=pt[0], y=pt[1], width=w, height=h,
+                confidence=result[pt[1], pt[0]], label=template_name,
+            ))
+        
+        # Non-maximum suppression (simple distance-based)
+        return self._nms(detections, distance_threshold=min(w, h) // 2)
+    
+    def find_by_color(
+        self, screen: np.ndarray, lower_hsv: Tuple[int, int, int],
+        upper_hsv: Tuple[int, int, int], min_area: int = 100,
+        label: str = "",
+    ) -> List[Detection]:
+        """Find objects by HSV color range."""
+        hsv = cv2.cvtColor(screen, cv2.COLOR_BGR2HSV)
+        mask = cv2.inRange(hsv, np.array(lower_hsv), np.array(upper_hsv))
+        
+        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        
+        detections = []
+        for contour in contours:
+            area = cv2.contourArea(contour)
+            if area >= min_area:
+                x, y, w, h = cv2.boundingRect(contour)
+                detections.append(Detection(
+                    x=x, y=y, width=w, height=h,
+                    confidence=area / (w * h), label=label,
+                ))
+        
+        return detections
+    
+    def _nms(self, detections: List[Detection], distance_threshold: int) -> List[Detection]:
+        """Simple non-maximum suppression by distance."""
+        if not detections:
+            return []
+        
+        detections.sort(key=lambda d: d.confidence, reverse=True)
+        kept = []
+        
+        for det in detections:
+            too_close = False
+            for k in kept:
+                dx = abs(det.center[0] - k.center[0])
+                dy = abs(det.center[1] - k.center[1])
+                if dx < distance_threshold and dy < distance_threshold:
+                    too_close = True
+                    break
+            if not too_close:
+                kept.append(det)
+        
+        return kept