Initial project structure: reusable isometric bot engine with D2R implementation

This commit is contained in:
Hoid 2026-02-14 08:50:36 +00:00
commit e0282a7111
44 changed files with 3433 additions and 0 deletions

35
engine/__init__.py Normal file
View file

@ -0,0 +1,35 @@
"""ISO Bot Engine - Core reusable components for isometric game bots.
This module provides the fundamental building blocks for creating bots that work with
isometric games through screen reading and human-like input simulation.
The engine is designed to be game-agnostic, with game-specific implementations
built on top of these core components.
Main Components:
- screen: Screenshot capture, OCR, template matching
- input: Human-like mouse/keyboard input simulation
- vision: Computer vision utilities for object detection
- state: Game state management with event system
- navigation: Pathfinding and movement control
- safety: Anti-detection measures and timing randomization
"""
__version__ = "0.1.0"
__author__ = "Hoid"
from .screen.capture import ScreenCapture
from .input.humanize import HumanInput
from .state.manager import GameStateManager
from .vision.detector import ObjectDetector
from .navigation.pathfinder import Pathfinder
from .safety.timing import SafetyTimer
__all__ = [
"ScreenCapture",
"HumanInput",
"GameStateManager",
"ObjectDetector",
"Pathfinder",
"SafetyTimer",
]

23
engine/input/__init__.py Normal file
View file

@ -0,0 +1,23 @@
"""Human-like input simulation for mouse and keyboard interactions.
This module provides tools for generating realistic input patterns that mimic
human behavior, including natural mouse movement curves and timing variations.
Components:
- mouse: Human-like mouse movement with Bézier curves
- keyboard: Keyboard input with realistic timing patterns
- humanize: Central controller for randomized, human-like interactions
"""
from .mouse import MouseController, MousePath
from .keyboard import KeyboardController, KeySequence
from .humanize import HumanInput, InputConfig
__all__ = [
"MouseController",
"MousePath",
"KeyboardController",
"KeySequence",
"HumanInput",
"InputConfig",
]

112
engine/input/humanize.py Normal file
View file

@ -0,0 +1,112 @@
"""Human-like behavior patterns for input simulation.
Provides randomization utilities to make bot inputs appear natural,
including variable delays, mouse jitter, and activity scheduling.
"""
import random
import time
import logging
from typing import Tuple, Optional
from dataclasses import dataclass, field
logger = logging.getLogger(__name__)
@dataclass
class HumanProfile:
"""Defines a human behavior profile for input randomization."""
# Reaction time range in seconds
reaction_min: float = 0.15
reaction_max: float = 0.45
# Mouse movement speed range (pixels per second)
mouse_speed_min: float = 400.0
mouse_speed_max: float = 1200.0
# Click position jitter in pixels
click_jitter: int = 3
# Chance of double-reading (hesitation before action)
hesitation_chance: float = 0.1
hesitation_duration: Tuple[float, float] = (0.3, 1.2)
# Break scheduling
micro_break_interval: Tuple[int, int] = (120, 300) # seconds
micro_break_duration: Tuple[int, int] = (2, 8) # seconds
long_break_interval: Tuple[int, int] = (1800, 3600) # seconds
long_break_duration: Tuple[int, int] = (60, 300) # seconds
class Humanizer:
"""Applies human-like randomization to bot actions."""
def __init__(self, profile: Optional[HumanProfile] = None):
self.profile = profile or HumanProfile()
self._last_micro_break = time.time()
self._last_long_break = time.time()
self._next_micro_break = self._schedule_break(self.profile.micro_break_interval)
self._next_long_break = self._schedule_break(self.profile.long_break_interval)
self._action_count = 0
def reaction_delay(self) -> float:
"""Generate a human-like reaction delay."""
base = random.uniform(self.profile.reaction_min, self.profile.reaction_max)
# Occasionally add hesitation
if random.random() < self.profile.hesitation_chance:
base += random.uniform(*self.profile.hesitation_duration)
# Slight fatigue factor based on actions performed
fatigue = min(self._action_count / 1000, 0.3)
base *= (1 + fatigue * random.random())
return base
def jitter_position(self, x: int, y: int) -> Tuple[int, int]:
"""Add small random offset to click position."""
jitter = self.profile.click_jitter
return (
x + random.randint(-jitter, jitter),
y + random.randint(-jitter, jitter),
)
def mouse_speed(self) -> float:
"""Get randomized mouse movement speed."""
return random.uniform(
self.profile.mouse_speed_min,
self.profile.mouse_speed_max,
)
def should_take_break(self) -> Optional[float]:
"""Check if it's time for a break. Returns break duration or None."""
now = time.time()
if now >= self._next_long_break:
duration = random.uniform(*self.profile.long_break_duration)
self._next_long_break = now + duration + self._schedule_break(
self.profile.long_break_interval
)
logger.info(f"Long break: {duration:.0f}s")
return duration
if now >= self._next_micro_break:
duration = random.uniform(*self.profile.micro_break_duration)
self._next_micro_break = now + duration + self._schedule_break(
self.profile.micro_break_interval
)
logger.debug(f"Micro break: {duration:.1f}s")
return duration
return None
def wait(self) -> None:
"""Wait for a human-like reaction delay."""
delay = self.reaction_delay()
time.sleep(delay)
self._action_count += 1
def _schedule_break(self, interval: Tuple[int, int]) -> float:
"""Schedule next break with randomized interval."""
return time.time() + random.uniform(*interval)

368
engine/input/keyboard.py Normal file
View file

@ -0,0 +1,368 @@
"""Human-like keyboard input simulation with realistic timing.
Provides keyboard input with natural typing patterns, including
varied keystroke timing and realistic human typing characteristics.
"""
from typing import List, Dict, Optional, Union
from dataclasses import dataclass
import time
import random
import logging
import pyautogui
from pynput import keyboard
logger = logging.getLogger(__name__)
@dataclass
class KeySequence:
"""Represents a sequence of keys with timing information."""
keys: List[str]
delays: List[float]
total_duration: float
class TypingProfile:
"""Defines typing characteristics for human-like input."""
def __init__(self, wpm: int = 60, accuracy: float = 0.95):
"""Initialize typing profile.
Args:
wpm: Words per minute typing speed
accuracy: Typing accuracy (0.0 to 1.0)
"""
self.wpm = wpm
self.accuracy = accuracy
# Calculate base timing from WPM (assuming 5 characters per word)
chars_per_minute = wpm * 5
self.base_char_delay = 60.0 / chars_per_minute
# Timing variations
self.min_delay = 0.02 # Minimum delay between keys
self.max_delay = 0.5 # Maximum delay between keys
self.word_pause = 0.1 # Additional pause after spaces
# Common typing patterns
self.difficult_sequences = {
'th', 'ch', 'sh', 'qu', 'tion', 'ing', 'er', 'ed'
}
# Keys that typically take longer to find
self.slow_keys = {
'q', 'z', 'x', 'j', 'k', 'shift', 'ctrl', 'alt'
}
class KeyboardController:
"""Controller for human-like keyboard input."""
def __init__(self, typing_profile: Optional[TypingProfile] = None):
"""Initialize keyboard controller.
Args:
typing_profile: Typing characteristics, or None for default
"""
self.profile = typing_profile or TypingProfile()
self.shift_held = False
self.ctrl_held = False
self.alt_held = False
# Key mapping for special keys
self.key_mapping = {
'enter': '\n',
'return': '\n',
'tab': '\t',
'space': ' ',
'backspace': 'backspace',
'delete': 'delete',
'escape': 'esc',
'esc': 'esc',
}
def calculate_key_delay(self, key: str, previous_key: Optional[str] = None) -> float:
"""Calculate realistic delay for typing a key.
Args:
key: Key to type
previous_key: Previously typed key for sequence analysis
Returns:
Delay in seconds before typing this key
"""
# Base delay from typing speed
delay = self.profile.base_char_delay
# Adjust for key difficulty
if key.lower() in self.profile.slow_keys:
delay *= random.uniform(1.2, 1.8)
# Adjust for difficult sequences
if previous_key:
sequence = previous_key.lower() + key.lower()
if any(seq in sequence for seq in self.profile.difficult_sequences):
delay *= random.uniform(1.1, 1.5)
# Add natural variation
delay *= random.uniform(0.7, 1.4)
# Extra pause after spaces (word boundaries)
if previous_key and previous_key == ' ':
delay += self.profile.word_pause * random.uniform(0.5, 1.5)
# Clamp to reasonable bounds
return max(self.profile.min_delay, min(self.profile.max_delay, delay))
def type_text(self, text: str, include_errors: bool = False) -> None:
"""Type text with human-like timing and optional errors.
Args:
text: Text to type
include_errors: Whether to include typing errors and corrections
"""
if not text:
return
logger.debug(f"Typing text: '{text[:50]}{'...' if len(text) > 50 else ''}'")
previous_key = None
for i, char in enumerate(text):
# Calculate delay before this character
delay = self.calculate_key_delay(char, previous_key)
# Sleep before typing
time.sleep(delay)
# Occasionally make typing errors if enabled
if include_errors and self.should_make_error():
self.make_typing_error(char)
else:
self.type_key(char)
previous_key = char
def should_make_error(self) -> bool:
"""Determine if a typing error should be made.
Returns:
True if an error should be made
"""
return random.random() > self.profile.accuracy
def make_typing_error(self, intended_key: str) -> None:
"""Make a typing error and correct it.
Args:
intended_key: The key that was supposed to be typed
"""
# Type wrong key (usually adjacent on keyboard)
wrong_key = self.get_adjacent_key(intended_key)
self.type_key(wrong_key)
# Pause as human realizes mistake
time.sleep(random.uniform(0.1, 0.4))
# Backspace to correct
self.type_key('backspace')
time.sleep(random.uniform(0.05, 0.15))
# Type correct key
self.type_key(intended_key)
def get_adjacent_key(self, key: str) -> str:
"""Get an adjacent key for typing errors.
Args:
key: Original key
Returns:
Adjacent key that could be mistyped
"""
# Simplified adjacent key mapping
adjacent_map = {
'a': 'sq', 'b': 'vgn', 'c': 'xvd', 'd': 'sfe', 'e': 'wrd',
'f': 'dgr', 'g': 'fht', 'h': 'gyu', 'i': 'uko', 'j': 'hnu',
'k': 'jmo', 'l': 'kpo', 'm': 'njk', 'n': 'bhm', 'o': 'ilp',
'p': 'olo', 'q': 'wa', 'r': 'etf', 's': 'adw', 't': 'rgy',
'u': 'yhi', 'v': 'cfg', 'w': 'qse', 'x': 'zdc', 'y': 'tgu',
'z': 'xas'
}
adjacent_keys = adjacent_map.get(key.lower(), 'abcd')
return random.choice(adjacent_keys)
def type_key(self, key: str) -> None:
"""Type a single key.
Args:
key: Key to type
"""
# Handle special keys
if key.lower() in self.key_mapping:
mapped_key = self.key_mapping[key.lower()]
if mapped_key in ['backspace', 'delete', 'esc']:
pyautogui.press(mapped_key)
else:
pyautogui.write(mapped_key)
else:
pyautogui.write(key)
def press_key_combination(self, *keys: str) -> None:
"""Press a combination of keys (e.g., Ctrl+C).
Args:
keys: Keys to press together
"""
logger.debug(f"Pressing key combination: {'+'.join(keys)}")
# Press all keys down
for key in keys:
pyautogui.keyDown(key)
time.sleep(random.uniform(0.01, 0.03))
# Hold briefly
time.sleep(random.uniform(0.05, 0.1))
# Release all keys (in reverse order)
for key in reversed(keys):
pyautogui.keyUp(key)
time.sleep(random.uniform(0.01, 0.03))
def press_key(self, key: str, duration: Optional[float] = None) -> None:
"""Press and release a key.
Args:
key: Key to press
duration: How long to hold key, or None for quick press
"""
if duration is None:
pyautogui.press(key)
else:
pyautogui.keyDown(key)
time.sleep(duration)
pyautogui.keyUp(key)
def hold_key(self, key: str) -> None:
"""Start holding a key down.
Args:
key: Key to hold
"""
pyautogui.keyDown(key)
# Track modifier keys
if key.lower() == 'shift':
self.shift_held = True
elif key.lower() in ['ctrl', 'control']:
self.ctrl_held = True
elif key.lower() == 'alt':
self.alt_held = True
def release_key(self, key: str) -> None:
"""Stop holding a key.
Args:
key: Key to release
"""
pyautogui.keyUp(key)
# Track modifier keys
if key.lower() == 'shift':
self.shift_held = False
elif key.lower() in ['ctrl', 'control']:
self.ctrl_held = False
elif key.lower() == 'alt':
self.alt_held = False
def release_all_keys(self) -> None:
"""Release all held modifier keys."""
if self.shift_held:
self.release_key('shift')
if self.ctrl_held:
self.release_key('ctrl')
if self.alt_held:
self.release_key('alt')
def type_number_sequence(self, numbers: Union[str, int],
use_numpad: bool = False) -> None:
"""Type a sequence of numbers.
Args:
numbers: Numbers to type
use_numpad: Whether to use numpad keys
"""
number_str = str(numbers)
for digit in number_str:
if digit.isdigit():
if use_numpad:
key = f'num{digit}'
else:
key = digit
self.type_key(key)
time.sleep(self.calculate_key_delay(digit))
def simulate_pause(self, pause_type: str = 'thinking') -> None:
"""Simulate natural pauses in typing.
Args:
pause_type: Type of pause ('thinking', 'reading', 'short')
"""
if pause_type == 'thinking':
duration = random.uniform(0.5, 2.0)
elif pause_type == 'reading':
duration = random.uniform(0.2, 0.8)
else: # short
duration = random.uniform(0.1, 0.3)
logger.debug(f"Simulating {pause_type} pause for {duration:.2f}s")
time.sleep(duration)
def generate_key_sequence(self, text: str) -> KeySequence:
"""Generate a key sequence with timing for given text.
Args:
text: Text to generate sequence for
Returns:
KeySequence with keys and delays
"""
keys = list(text)
delays = []
total_duration = 0.0
previous_key = None
for key in keys:
delay = self.calculate_key_delay(key, previous_key)
delays.append(delay)
total_duration += delay
previous_key = key
return KeySequence(keys, delays, total_duration)
def set_typing_speed(self, wpm: int) -> None:
"""Set typing speed.
Args:
wpm: Words per minute
"""
self.profile.wpm = max(10, min(200, wpm))
chars_per_minute = self.profile.wpm * 5
self.profile.base_char_delay = 60.0 / chars_per_minute
logger.info(f"Typing speed set to {self.profile.wpm} WPM")
def set_accuracy(self, accuracy: float) -> None:
"""Set typing accuracy.
Args:
accuracy: Accuracy from 0.0 to 1.0
"""
self.profile.accuracy = max(0.0, min(1.0, accuracy))
logger.info(f"Typing accuracy set to {self.profile.accuracy * 100:.1f}%")

345
engine/input/mouse.py Normal file
View file

@ -0,0 +1,345 @@
"""Human-like mouse movement and clicking with Bézier curves.
Provides realistic mouse movement patterns using Bézier curves with
randomized control points and natural acceleration/deceleration.
"""
from typing import Tuple, List, Optional, Callable
from dataclasses import dataclass
import time
import math
import random
import logging
import pyautogui
import numpy as np
logger = logging.getLogger(__name__)
# Disable pyautogui failsafe for production use
pyautogui.FAILSAFE = False
@dataclass
class MousePath:
"""Represents a mouse movement path with timing."""
points: List[Tuple[int, int]]
delays: List[float]
total_duration: float
class BezierCurve:
"""Bézier curve generation for natural mouse movement."""
@staticmethod
def cubic_bezier(t: float, p0: Tuple[float, float], p1: Tuple[float, float],
p2: Tuple[float, float], p3: Tuple[float, float]) -> Tuple[float, float]:
"""Calculate point on cubic Bézier curve at parameter t.
Args:
t: Parameter from 0 to 1
p0: Start point
p1: First control point
p2: Second control point
p3: End point
Returns:
(x, y) point on curve
"""
x = (1-t)**3 * p0[0] + 3*(1-t)**2*t * p1[0] + 3*(1-t)*t**2 * p2[0] + t**3 * p3[0]
y = (1-t)**3 * p0[1] + 3*(1-t)**2*t * p1[1] + 3*(1-t)*t**2 * p2[1] + t**3 * p3[1]
return (x, y)
@staticmethod
def generate_control_points(start: Tuple[int, int], end: Tuple[int, int],
randomness: float = 0.3) -> Tuple[Tuple[float, float], Tuple[float, float]]:
"""Generate random control points for natural curve.
Args:
start: Starting position
end: Ending position
randomness: Amount of randomness (0.0 to 1.0)
Returns:
Tuple of two control points
"""
dx = end[0] - start[0]
dy = end[1] - start[1]
distance = math.sqrt(dx*dx + dy*dy)
# Control point offset based on distance and randomness
offset_magnitude = distance * randomness * random.uniform(0.2, 0.8)
# Random angles for control points
angle1 = random.uniform(-math.pi, math.pi)
angle2 = random.uniform(-math.pi, math.pi)
# First control point (closer to start)
cp1_x = start[0] + dx * 0.25 + math.cos(angle1) * offset_magnitude
cp1_y = start[1] + dy * 0.25 + math.sin(angle1) * offset_magnitude
# Second control point (closer to end)
cp2_x = start[0] + dx * 0.75 + math.cos(angle2) * offset_magnitude
cp2_y = start[1] + dy * 0.75 + math.sin(angle2) * offset_magnitude
return ((cp1_x, cp1_y), (cp2_x, cp2_y))
class MouseController:
"""Controller for human-like mouse interactions."""
def __init__(self):
"""Initialize mouse controller."""
self.current_pos = pyautogui.position()
self.movement_speed = 1.0 # Multiplier for movement speed
self.click_variance = 3 # Pixel variance for click positions
# Movement timing parameters
self.min_duration = 0.1 # Minimum movement time
self.max_duration = 1.5 # Maximum movement time
self.base_speed = 1000 # Base pixels per second
def get_current_position(self) -> Tuple[int, int]:
"""Get current mouse position.
Returns:
(x, y) tuple of current position
"""
self.current_pos = pyautogui.position()
return self.current_pos
def calculate_movement_duration(self, start: Tuple[int, int],
end: Tuple[int, int]) -> float:
"""Calculate realistic movement duration based on distance.
Args:
start: Starting position
end: Ending position
Returns:
Movement duration in seconds
"""
dx = end[0] - start[0]
dy = end[1] - start[1]
distance = math.sqrt(dx*dx + dy*dy)
# Fitts' Law inspired calculation
# Time increases logarithmically with distance
base_time = distance / (self.base_speed * self.movement_speed)
fitts_factor = math.log2(1 + distance / 10) / 10
duration = base_time + fitts_factor
# Add some randomness
duration *= random.uniform(0.8, 1.2)
# Clamp to reasonable bounds
return max(self.min_duration, min(self.max_duration, duration))
def generate_movement_path(self, start: Tuple[int, int], end: Tuple[int, int],
duration: Optional[float] = None,
steps: Optional[int] = None) -> MousePath:
"""Generate Bézier curve path for mouse movement.
Args:
start: Starting position
end: Ending position
duration: Movement duration, or None to calculate
steps: Number of steps, or None to calculate
Returns:
MousePath with points and timing
"""
if duration is None:
duration = self.calculate_movement_duration(start, end)
if steps is None:
# Calculate steps based on distance and duration
distance = math.sqrt((end[0] - start[0])**2 + (end[1] - start[1])**2)
steps = max(10, int(distance / 10)) # Roughly 10 pixels per step
# Generate control points
cp1, cp2 = BezierCurve.generate_control_points(start, end)
# Generate path points
points = []
delays = []
for i in range(steps + 1):
t = i / steps
# Use ease-in-out curve for timing
timing_t = self._ease_in_out(t)
# Calculate position on Bézier curve
x, y = BezierCurve.cubic_bezier(timing_t, start, cp1, cp2, end)
points.append((int(x), int(y)))
# Calculate delay for this step
if i < steps:
delay = duration / steps
# Add small random variation
delay *= random.uniform(0.8, 1.2)
delays.append(delay)
return MousePath(points, delays, duration)
def move_to(self, target: Tuple[int, int], duration: Optional[float] = None) -> None:
"""Move mouse to target position using Bézier curve.
Args:
target: Target (x, y) position
duration: Movement duration, or None to calculate
"""
start = self.get_current_position()
path = self.generate_movement_path(start, target, duration)
logger.debug(f"Moving mouse from {start} to {target} in {path.total_duration:.2f}s")
for i, point in enumerate(path.points[1:], 1):
pyautogui.moveTo(point[0], point[1], duration=0)
if i <= len(path.delays):
time.sleep(path.delays[i-1])
self.current_pos = target
def click(self, position: Optional[Tuple[int, int]] = None,
button: str = 'left', move_first: bool = True) -> None:
"""Click at specified position with human-like variation.
Args:
position: Click position, or None for current position
button: Mouse button ('left', 'right', 'middle')
move_first: Whether to move to position first
"""
if position is None:
position = self.get_current_position()
else:
# Add small random offset for more human-like clicking
offset_x = random.randint(-self.click_variance, self.click_variance)
offset_y = random.randint(-self.click_variance, self.click_variance)
position = (position[0] + offset_x, position[1] + offset_y)
if move_first and position != self.get_current_position():
self.move_to(position)
# Random pre-click delay
time.sleep(random.uniform(0.01, 0.05))
logger.debug(f"Clicking {button} button at {position}")
pyautogui.click(position[0], position[1], button=button)
# Random post-click delay
time.sleep(random.uniform(0.01, 0.08))
def double_click(self, position: Optional[Tuple[int, int]] = None,
move_first: bool = True) -> None:
"""Double-click at specified position.
Args:
position: Click position, or None for current position
move_first: Whether to move to position first
"""
if position is None:
position = self.get_current_position()
if move_first and position != self.get_current_position():
self.move_to(position)
# Random delay before double-click
time.sleep(random.uniform(0.01, 0.05))
logger.debug(f"Double-clicking at {position}")
pyautogui.doubleClick(position[0], position[1])
# Random delay after double-click
time.sleep(random.uniform(0.05, 0.1))
def drag(self, start: Tuple[int, int], end: Tuple[int, int],
button: str = 'left', duration: Optional[float] = None) -> None:
"""Drag from start to end position.
Args:
start: Starting position
end: Ending position
button: Mouse button to drag with
duration: Drag duration, or None to calculate
"""
# Move to start position
self.move_to(start)
# Mouse down
time.sleep(random.uniform(0.01, 0.03))
pyautogui.mouseDown(start[0], start[1], button=button)
# Wait briefly before starting drag
time.sleep(random.uniform(0.05, 0.1))
# Generate drag path
path = self.generate_movement_path(start, end, duration)
logger.debug(f"Dragging from {start} to {end}")
# Execute drag movement
for i, point in enumerate(path.points[1:], 1):
pyautogui.moveTo(point[0], point[1], duration=0)
if i <= len(path.delays):
time.sleep(path.delays[i-1])
# Mouse up
time.sleep(random.uniform(0.01, 0.03))
pyautogui.mouseUp(end[0], end[1], button=button)
self.current_pos = end
def scroll(self, clicks: int, position: Optional[Tuple[int, int]] = None) -> None:
"""Scroll at specified position.
Args:
clicks: Number of scroll clicks (positive = up, negative = down)
position: Scroll position, or None for current position
"""
if position is not None and position != self.get_current_position():
self.move_to(position)
# Random delay before scrolling
time.sleep(random.uniform(0.05, 0.15))
# Scroll with small delays between clicks for more human-like behavior
for i in range(abs(clicks)):
scroll_direction = 1 if clicks > 0 else -1
pyautogui.scroll(scroll_direction)
if i < abs(clicks) - 1: # Don't delay after last scroll
time.sleep(random.uniform(0.02, 0.08))
def _ease_in_out(self, t: float) -> float:
"""Ease-in-out function for smooth acceleration/deceleration.
Args:
t: Input parameter (0 to 1)
Returns:
Eased parameter (0 to 1)
"""
return t * t * (3.0 - 2.0 * t)
def set_movement_speed(self, speed: float) -> None:
"""Set movement speed multiplier.
Args:
speed: Speed multiplier (1.0 = normal, 2.0 = double speed, etc.)
"""
self.movement_speed = max(0.1, min(5.0, speed))
logger.info(f"Mouse movement speed set to {self.movement_speed}x")
def set_click_variance(self, variance: int) -> None:
"""Set click position variance in pixels.
Args:
variance: Maximum pixel offset for clicks
"""
self.click_variance = max(0, min(10, variance))
logger.info(f"Click variance set to {self.click_variance} pixels")

View file

View file

@ -0,0 +1,46 @@
"""Character movement control for isometric games.
Handles click-to-move navigation with human-like patterns.
"""
from typing import Tuple, Optional
import logging
import time
import numpy as np
from engine.input.mouse import MouseController
from engine.input.humanize import Humanizer
from engine.navigation.pathfinder import Waypoint, WaypointGraph
logger = logging.getLogger(__name__)
class MovementController:
"""Controls character movement via click-to-move."""
def __init__(self, mouse: MouseController, humanizer: Humanizer):
self.mouse = mouse
self.humanizer = humanizer
self.waypoints = WaypointGraph()
def click_to_move(self, x: int, y: int) -> None:
"""Click a screen position to move there."""
jx, jy = self.humanizer.jitter_position(x, y)
self.mouse.move_to(jx, jy)
self.humanizer.wait()
self.mouse.click()
def navigate_waypoints(self, start: str, goal: str) -> bool:
"""Navigate between named waypoints."""
path = self.waypoints.find_path(start, goal)
if not path:
logger.warning(f"No path from {start} to {goal}")
return False
for waypoint in path[1:]: # Skip start
self.click_to_move(waypoint.screen_x, waypoint.screen_y)
# Wait for movement (game-specific timing)
time.sleep(self.humanizer.reaction_delay() + 0.5)
return True

View file

@ -0,0 +1,78 @@
"""Pathfinding for isometric game navigation.
Implements A* and click-to-move navigation for isometric games
where the bot needs to move between known locations.
"""
from typing import List, Tuple, Optional, Dict
from dataclasses import dataclass
import heapq
import math
import logging
logger = logging.getLogger(__name__)
@dataclass
class Waypoint:
"""A named location in the game world."""
name: str
screen_x: int
screen_y: int
metadata: Dict = None
class WaypointGraph:
"""Graph of connected waypoints for navigation."""
def __init__(self):
self._waypoints: Dict[str, Waypoint] = {}
self._edges: Dict[str, List[str]] = {}
def add_waypoint(self, waypoint: Waypoint) -> None:
self._waypoints[waypoint.name] = waypoint
self._edges.setdefault(waypoint.name, [])
def connect(self, name_a: str, name_b: str, bidirectional: bool = True) -> None:
self._edges.setdefault(name_a, []).append(name_b)
if bidirectional:
self._edges.setdefault(name_b, []).append(name_a)
def find_path(self, start: str, goal: str) -> Optional[List[Waypoint]]:
"""A* pathfinding between waypoints."""
if start not in self._waypoints or goal not in self._waypoints:
return None
goal_wp = self._waypoints[goal]
def heuristic(name: str) -> float:
wp = self._waypoints[name]
return math.hypot(wp.screen_x - goal_wp.screen_x, wp.screen_y - goal_wp.screen_y)
open_set = [(heuristic(start), 0, start)]
came_from: Dict[str, str] = {}
g_score: Dict[str, float] = {start: 0}
while open_set:
_, cost, current = heapq.heappop(open_set)
if current == goal:
path = []
while current in came_from:
path.append(self._waypoints[current])
current = came_from[current]
path.append(self._waypoints[start])
return list(reversed(path))
for neighbor in self._edges.get(current, []):
n_wp = self._waypoints[neighbor]
c_wp = self._waypoints[current]
edge_cost = math.hypot(n_wp.screen_x - c_wp.screen_x, n_wp.screen_y - c_wp.screen_y)
tentative_g = g_score[current] + edge_cost
if tentative_g < g_score.get(neighbor, float('inf')):
came_from[neighbor] = current
g_score[neighbor] = tentative_g
heapq.heappush(open_set, (tentative_g + heuristic(neighbor), tentative_g, neighbor))
return None

View file

73
engine/safety/patterns.py Normal file
View file

@ -0,0 +1,73 @@
"""Behavioral pattern randomization for anti-detection.
Varies bot behavior to avoid detectable patterns like identical
farming routes, consistent timing, or perfect execution.
"""
import random
import logging
from typing import List, Tuple, Callable, Any
logger = logging.getLogger(__name__)
class RouteRandomizer:
"""Randomizes farming routes and action sequences."""
def __init__(self, variation_factor: float = 0.15):
self.variation_factor = variation_factor
def shuffle_optional_steps(
self, steps: List[Any], required_indices: List[int] = None,
) -> List[Any]:
"""Shuffle non-required steps while keeping required ones in order."""
required_indices = set(required_indices or [])
required = [(i, s) for i, s in enumerate(steps) if i in required_indices]
optional = [s for i, s in enumerate(steps) if i not in required_indices]
random.shuffle(optional)
result = []
opt_iter = iter(optional)
req_iter = iter(required)
next_req = next(req_iter, None)
for i in range(len(steps)):
if next_req and next_req[0] == i:
result.append(next_req[1])
next_req = next(req_iter, None)
else:
result.append(next(opt_iter))
return result
def vary_route(
self, waypoints: List[Tuple[int, int]],
) -> List[Tuple[int, int]]:
"""Add slight variations to a route's waypoints."""
varied = []
for x, y in waypoints:
offset_x = int(x * self.variation_factor * random.uniform(-1, 1))
offset_y = int(y * self.variation_factor * random.uniform(-1, 1))
varied.append((x + offset_x, y + offset_y))
return varied
def should_skip_optional(self, skip_chance: float = 0.1) -> bool:
"""Randomly decide to skip an optional action."""
return random.random() < skip_chance
class ActionVariator:
"""Varies how actions are performed."""
@staticmethod
def vary_count(target: int, variance: int = 1) -> int:
"""Vary a repeat count (e.g., click 2-4 times instead of always 3)."""
return max(1, target + random.randint(-variance, variance))
@staticmethod
def random_order(actions: List[Callable]) -> List[Callable]:
"""Randomize the order of independent actions."""
shuffled = actions.copy()
random.shuffle(shuffled)
return shuffled

68
engine/safety/timing.py Normal file
View file

@ -0,0 +1,68 @@
"""Anti-detection timing and break scheduling.
Manages play sessions with realistic timing patterns to avoid
behavioral detection systems.
"""
import random
import time
import logging
from dataclasses import dataclass
from typing import Optional
logger = logging.getLogger(__name__)
@dataclass
class SessionSchedule:
"""Defines a play session schedule."""
min_session_hours: float = 1.0
max_session_hours: float = 4.0
min_break_minutes: float = 10.0
max_break_minutes: float = 45.0
max_daily_hours: float = 12.0
class SessionTimer:
"""Manages bot session timing to mimic human play patterns."""
def __init__(self, schedule: Optional[SessionSchedule] = None):
self.schedule = schedule or SessionSchedule()
self._session_start = time.time()
self._daily_playtime = 0.0
self._day_start = time.time()
self._target_duration = self._roll_session_duration()
def _roll_session_duration(self) -> float:
"""Generate random session duration in seconds."""
hours = random.uniform(
self.schedule.min_session_hours,
self.schedule.max_session_hours,
)
return hours * 3600
def session_elapsed(self) -> float:
"""Seconds elapsed in current session."""
return time.time() - self._session_start
def should_stop_session(self) -> bool:
"""Check if current session should end."""
if self.session_elapsed() >= self._target_duration:
return True
if self._daily_playtime + self.session_elapsed() >= self.schedule.max_daily_hours * 3600:
return True
return False
def get_break_duration(self) -> float:
"""Get randomized break duration in seconds."""
return random.uniform(
self.schedule.min_break_minutes * 60,
self.schedule.max_break_minutes * 60,
)
def start_new_session(self) -> None:
"""Start a new play session after break."""
self._daily_playtime += self.session_elapsed()
self._session_start = time.time()
self._target_duration = self._roll_session_duration()
logger.info(f"New session: {self._target_duration/3600:.1f}h target")

23
engine/screen/__init__.py Normal file
View file

@ -0,0 +1,23 @@
"""Screen reading components for visual game state detection.
This module provides tools for capturing, analyzing, and extracting information
from game screenshots without requiring memory access or game modification.
Components:
- capture: Screenshot capture using various backends
- ocr: Optical Character Recognition for text extraction
- template: Template matching for UI element detection
"""
from .capture import ScreenCapture, ScreenRegion
from .ocr import OCREngine, TextDetector
from .template import TemplateManager, TemplateMatcher
__all__ = [
"ScreenCapture",
"ScreenRegion",
"OCREngine",
"TextDetector",
"TemplateManager",
"TemplateMatcher",
]

220
engine/screen/capture.py Normal file
View file

@ -0,0 +1,220 @@
"""Screen capture utilities for taking game screenshots.
Provides efficient screenshot capture using multiple backends (mss, PIL)
with support for specific regions and window targeting.
"""
from typing import Tuple, Optional, Dict, Any
from dataclasses import dataclass
import time
import logging
import numpy as np
from PIL import Image, ImageGrab
import mss
import cv2
logger = logging.getLogger(__name__)
@dataclass
class ScreenRegion:
"""Defines a rectangular region of the screen to capture."""
x: int
y: int
width: int
height: int
@property
def bounds(self) -> Tuple[int, int, int, int]:
"""Return region as (left, top, right, bottom) tuple."""
return (self.x, self.y, self.x + self.width, self.y + self.height)
@property
def mss_bounds(self) -> Dict[str, int]:
"""Return region in MSS format."""
return {
"top": self.y,
"left": self.x,
"width": self.width,
"height": self.height,
}
class ScreenCapture:
"""High-performance screen capture with multiple backends."""
def __init__(self, backend: str = "mss", monitor: int = 1):
"""Initialize screen capture.
Args:
backend: Capture backend ("mss" or "pil")
monitor: Monitor number to capture from (1-indexed)
"""
self.backend = backend
self.monitor = monitor
self._mss_instance: Optional[mss.mss] = None
self._monitor_info: Optional[Dict[str, int]] = None
if backend == "mss":
self._initialize_mss()
def _initialize_mss(self) -> None:
"""Initialize MSS backend."""
try:
self._mss_instance = mss.mss()
monitors = self._mss_instance.monitors
if self.monitor >= len(monitors):
logger.warning(f"Monitor {self.monitor} not found, using primary")
self.monitor = 1
self._monitor_info = monitors[self.monitor]
logger.info(f"Initialized MSS capture for monitor {self.monitor}: "
f"{self._monitor_info['width']}x{self._monitor_info['height']}")
except Exception as e:
logger.error(f"Failed to initialize MSS: {e}")
self.backend = "pil"
def capture_screen(self, region: Optional[ScreenRegion] = None) -> np.ndarray:
"""Capture screenshot of screen or region.
Args:
region: Specific region to capture, or None for full screen
Returns:
Screenshot as numpy array in BGR format (for OpenCV compatibility)
"""
try:
if self.backend == "mss":
return self._capture_mss(region)
else:
return self._capture_pil(region)
except Exception as e:
logger.error(f"Screen capture failed: {e}")
# Fallback to empty image
return np.zeros((100, 100, 3), dtype=np.uint8)
def _capture_mss(self, region: Optional[ScreenRegion]) -> np.ndarray:
"""Capture using MSS backend."""
if not self._mss_instance:
raise RuntimeError("MSS not initialized")
if region:
monitor = region.mss_bounds
else:
monitor = self._monitor_info or self._mss_instance.monitors[self.monitor]
# MSS returns BGRA format
screenshot = self._mss_instance.grab(monitor)
img_array = np.frombuffer(screenshot.rgb, dtype=np.uint8)
img_array = img_array.reshape((screenshot.height, screenshot.width, 3))
# Convert RGB to BGR for OpenCV
return cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
def _capture_pil(self, region: Optional[ScreenRegion]) -> np.ndarray:
"""Capture using PIL backend."""
if region:
bbox = region.bounds
else:
bbox = None
# PIL returns RGB format
screenshot = ImageGrab.grab(bbox=bbox)
img_array = np.array(screenshot)
# Convert RGB to BGR for OpenCV
return cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
def save_screenshot(self, filename: str, region: Optional[ScreenRegion] = None) -> bool:
"""Save screenshot to file.
Args:
filename: Output filename
region: Region to capture, or None for full screen
Returns:
True if successful, False otherwise
"""
try:
img = self.capture_screen(region)
return cv2.imwrite(filename, img)
except Exception as e:
logger.error(f"Failed to save screenshot: {e}")
return False
def get_screen_size(self) -> Tuple[int, int]:
"""Get screen dimensions.
Returns:
(width, height) tuple
"""
if self.backend == "mss" and self._monitor_info:
return (self._monitor_info["width"], self._monitor_info["height"])
else:
# Use PIL as fallback
screenshot = ImageGrab.grab()
return screenshot.size
def find_window(self, window_title: str) -> Optional[ScreenRegion]:
"""Find window by title and return its region.
Args:
window_title: Partial or full window title to search for
Returns:
ScreenRegion if window found, None otherwise
Note:
This is a placeholder - actual implementation would use
platform-specific window enumeration (e.g., Windows API, X11)
"""
# TODO: Implement window finding
logger.warning("Window finding not implemented yet")
return None
def benchmark_capture(self, iterations: int = 100) -> Dict[str, float]:
"""Benchmark capture performance.
Args:
iterations: Number of captures to perform
Returns:
Performance statistics
"""
logger.info(f"Benchmarking {self.backend} backend ({iterations} iterations)")
start_time = time.perf_counter()
for _ in range(iterations):
self.capture_screen()
end_time = time.perf_counter()
total_time = end_time - start_time
avg_time = total_time / iterations
fps = iterations / total_time
stats = {
"backend": self.backend,
"iterations": iterations,
"total_time": total_time,
"avg_time_ms": avg_time * 1000,
"fps": fps,
}
logger.info(f"Benchmark results: {avg_time*1000:.2f}ms avg, {fps:.1f} FPS")
return stats
def __enter__(self):
"""Context manager entry."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
if self._mss_instance:
self._mss_instance.close()

346
engine/screen/ocr.py Normal file
View file

@ -0,0 +1,346 @@
"""OCR (Optical Character Recognition) for extracting text from screenshots.
Provides text detection and extraction capabilities using pytesseract
with preprocessing for better accuracy in game environments.
"""
from typing import List, Dict, Optional, Tuple, NamedTuple
import logging
import re
import cv2
import numpy as np
import pytesseract
from PIL import Image
logger = logging.getLogger(__name__)
class TextMatch(NamedTuple):
"""Represents detected text with position and confidence."""
text: str
confidence: float
bbox: Tuple[int, int, int, int] # (x, y, width, height)
class OCRConfig:
"""Configuration for OCR processing."""
def __init__(self):
# Tesseract configuration
self.tesseract_config = "--oem 3 --psm 6" # Default config
self.language = "eng"
self.min_confidence = 30.0
# Image preprocessing
self.preprocess = True
self.scale_factor = 2.0
self.denoise = True
self.contrast_enhance = True
# Text filtering
self.min_text_length = 1
self.filter_patterns = [
r'^[a-zA-Z0-9\s\-_:.,/]+$', # Alphanumeric with common punctuation
]
class OCREngine:
"""OCR engine for text extraction from game screenshots."""
def __init__(self, config: Optional[OCRConfig] = None):
"""Initialize OCR engine.
Args:
config: OCR configuration, or None for defaults
"""
self.config = config or OCRConfig()
self._verify_tesseract()
def _verify_tesseract(self) -> None:
"""Verify tesseract installation."""
try:
pytesseract.get_tesseract_version()
logger.info("Tesseract initialized successfully")
except Exception as e:
logger.error(f"Tesseract not found or not working: {e}")
raise RuntimeError("Tesseract OCR is required but not available")
def extract_text(self, image: np.ndarray, region: Optional[Tuple[int, int, int, int]] = None) -> str:
"""Extract all text from image.
Args:
image: Input image as numpy array
region: Optional (x, y, width, height) region to process
Returns:
Extracted text as string
"""
processed_img = self._preprocess_image(image, region)
try:
text = pytesseract.image_to_string(
processed_img,
lang=self.config.language,
config=self.config.tesseract_config
)
return self._clean_text(text)
except Exception as e:
logger.error(f"OCR extraction failed: {e}")
return ""
def find_text(self, image: np.ndarray, search_text: str,
case_sensitive: bool = False) -> List[TextMatch]:
"""Find specific text in image with positions.
Args:
image: Input image as numpy array
search_text: Text to search for
case_sensitive: Whether search should be case sensitive
Returns:
List of TextMatch objects for found text
"""
processed_img = self._preprocess_image(image)
try:
# Get detailed OCR data
data = pytesseract.image_to_data(
processed_img,
lang=self.config.language,
config=self.config.tesseract_config,
output_type=pytesseract.Output.DICT
)
matches = []
search_lower = search_text.lower() if not case_sensitive else search_text
for i in range(len(data['text'])):
text = data['text'][i].strip()
confidence = float(data['conf'][i])
if confidence < self.config.min_confidence:
continue
text_to_match = text.lower() if not case_sensitive else text
if search_lower in text_to_match:
bbox = (
data['left'][i],
data['top'][i],
data['width'][i],
data['height'][i]
)
matches.append(TextMatch(text, confidence, bbox))
return matches
except Exception as e:
logger.error(f"Text search failed: {e}")
return []
def get_text_regions(self, image: np.ndarray) -> List[TextMatch]:
"""Get all text regions with positions and confidence.
Args:
image: Input image as numpy array
Returns:
List of TextMatch objects for all detected text
"""
processed_img = self._preprocess_image(image)
try:
data = pytesseract.image_to_data(
processed_img,
lang=self.config.language,
config=self.config.tesseract_config,
output_type=pytesseract.Output.DICT
)
text_regions = []
for i in range(len(data['text'])):
text = data['text'][i].strip()
confidence = float(data['conf'][i])
if (confidence < self.config.min_confidence or
len(text) < self.config.min_text_length):
continue
if not self._passes_text_filters(text):
continue
bbox = (
data['left'][i],
data['top'][i],
data['width'][i],
data['height'][i]
)
text_regions.append(TextMatch(text, confidence, bbox))
return text_regions
except Exception as e:
logger.error(f"Text region detection failed: {e}")
return []
def _preprocess_image(self, image: np.ndarray,
region: Optional[Tuple[int, int, int, int]] = None) -> Image.Image:
"""Preprocess image for better OCR accuracy.
Args:
image: Input image as numpy array
region: Optional region to extract
Returns:
Preprocessed PIL Image
"""
# Extract region if specified
if region:
x, y, w, h = region
image = image[y:y+h, x:x+w]
if not self.config.preprocess:
return Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Scale up for better OCR
if self.config.scale_factor > 1.0:
height, width = gray.shape
new_width = int(width * self.config.scale_factor)
new_height = int(height * self.config.scale_factor)
gray = cv2.resize(gray, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
# Denoise
if self.config.denoise:
gray = cv2.fastNlMeansDenoising(gray)
# Enhance contrast
if self.config.contrast_enhance:
# Use CLAHE (Contrast Limited Adaptive Histogram Equalization)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
gray = clahe.apply(gray)
# Convert back to PIL Image
return Image.fromarray(gray)
def _clean_text(self, text: str) -> str:
"""Clean extracted text.
Args:
text: Raw extracted text
Returns:
Cleaned text
"""
# Remove extra whitespace
text = re.sub(r'\s+', ' ', text.strip())
# Remove common OCR artifacts
text = re.sub(r'[|¦]', 'I', text) # Vertical bars to I
text = re.sub(r'[{}]', '', text) # Remove braces
return text
def _passes_text_filters(self, text: str) -> bool:
"""Check if text passes configured filters.
Args:
text: Text to check
Returns:
True if text passes filters
"""
if not self.config.filter_patterns:
return True
for pattern in self.config.filter_patterns:
if re.match(pattern, text):
return True
return False
class TextDetector:
"""High-level text detection interface."""
def __init__(self, ocr_config: Optional[OCRConfig] = None):
"""Initialize text detector.
Args:
ocr_config: OCR configuration
"""
self.ocr = OCREngine(ocr_config)
self.text_cache: Dict[str, List[TextMatch]] = {}
def contains_text(self, image: np.ndarray, text: str,
case_sensitive: bool = False) -> bool:
"""Check if image contains specific text.
Args:
image: Input image
text: Text to search for
case_sensitive: Case sensitive search
Returns:
True if text found
"""
matches = self.ocr.find_text(image, text, case_sensitive)
return len(matches) > 0
def wait_for_text(self, capture_func, text: str, timeout: float = 10.0,
check_interval: float = 0.5) -> bool:
"""Wait for specific text to appear on screen.
Args:
capture_func: Function that returns screenshot
text: Text to wait for
timeout: Maximum wait time in seconds
check_interval: Time between checks in seconds
Returns:
True if text appeared, False if timeout
"""
import time
start_time = time.time()
while time.time() - start_time < timeout:
image = capture_func()
if self.contains_text(image, text):
return True
time.sleep(check_interval)
return False
def get_ui_text(self, image: np.ndarray) -> Dict[str, str]:
"""Extract common UI text elements.
Args:
image: Input image
Returns:
Dictionary mapping UI elements to text
"""
# This is a placeholder for game-specific UI text extraction
# In practice, this would define regions for health, mana, inventory, etc.
text_regions = self.ocr.get_text_regions(image)
ui_text = {}
for region in text_regions:
# Categorize text based on position or pattern
if "health" in region.text.lower():
ui_text["health"] = region.text
elif "mana" in region.text.lower():
ui_text["mana"] = region.text
# Add more UI element detection
return ui_text

403
engine/screen/template.py Normal file
View file

@ -0,0 +1,403 @@
"""Template matching for UI element detection in game screenshots.
Provides efficient template matching using OpenCV with support for
multiple templates, confidence thresholds, and template management.
"""
from typing import List, Dict, Optional, Tuple, NamedTuple
from pathlib import Path
import logging
from dataclasses import dataclass
import cv2
import numpy as np
logger = logging.getLogger(__name__)
class TemplateMatch(NamedTuple):
"""Represents a template match with position and confidence."""
template_name: str
confidence: float
center: Tuple[int, int] # (x, y) center position
bbox: Tuple[int, int, int, int] # (x, y, width, height)
@dataclass
class TemplateInfo:
"""Information about a loaded template."""
name: str
image: np.ndarray
width: int
height: int
path: Optional[str] = None
class TemplateMatcher:
"""Core template matching functionality."""
def __init__(self, method: int = cv2.TM_CCOEFF_NORMED,
threshold: float = 0.8):
"""Initialize template matcher.
Args:
method: OpenCV template matching method
threshold: Minimum confidence threshold (0.0 to 1.0)
"""
self.method = method
self.threshold = threshold
def match_template(self, image: np.ndarray, template: np.ndarray,
threshold: Optional[float] = None) -> List[TemplateMatch]:
"""Match single template in image.
Args:
image: Source image to search in
template: Template image to find
threshold: Confidence threshold override
Returns:
List of matches found
"""
if threshold is None:
threshold = self.threshold
# Convert to grayscale if needed
if len(image.shape) == 3:
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
image_gray = image
if len(template.shape) == 3:
template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
else:
template_gray = template
# Perform template matching
result = cv2.matchTemplate(image_gray, template_gray, self.method)
# Find matches above threshold
locations = np.where(result >= threshold)
matches = []
template_h, template_w = template_gray.shape
for pt in zip(*locations[::-1]): # Switch x and y
x, y = pt
confidence = result[y, x]
center = (x + template_w // 2, y + template_h // 2)
bbox = (x, y, template_w, template_h)
matches.append(TemplateMatch("", confidence, center, bbox))
# Remove overlapping matches (Non-Maximum Suppression)
matches = self._apply_nms(matches, overlap_threshold=0.3)
return matches
def match_multiple_scales(self, image: np.ndarray, template: np.ndarray,
scales: List[float] = None,
threshold: Optional[float] = None) -> List[TemplateMatch]:
"""Match template at multiple scales.
Args:
image: Source image
template: Template image
scales: List of scale factors to try
threshold: Confidence threshold
Returns:
List of matches at all scales
"""
if scales is None:
scales = [0.8, 0.9, 1.0, 1.1, 1.2]
all_matches = []
for scale in scales:
# Scale template
new_width = int(template.shape[1] * scale)
new_height = int(template.shape[0] * scale)
if new_width < 10 or new_height < 10:
continue # Skip very small templates
scaled_template = cv2.resize(template, (new_width, new_height))
# Find matches at this scale
matches = self.match_template(image, scaled_template, threshold)
all_matches.extend(matches)
# Apply NMS across all scales
all_matches = self._apply_nms(all_matches, overlap_threshold=0.5)
return all_matches
def _apply_nms(self, matches: List[TemplateMatch],
overlap_threshold: float = 0.3) -> List[TemplateMatch]:
"""Apply Non-Maximum Suppression to remove overlapping matches.
Args:
matches: List of template matches
overlap_threshold: Maximum allowed overlap ratio
Returns:
Filtered list of matches
"""
if not matches:
return matches
# Sort by confidence (highest first)
matches = sorted(matches, key=lambda x: x.confidence, reverse=True)
filtered_matches = []
for match in matches:
# Check if this match overlaps significantly with any kept match
is_duplicate = False
for kept_match in filtered_matches:
if self._calculate_overlap(match, kept_match) > overlap_threshold:
is_duplicate = True
break
if not is_duplicate:
filtered_matches.append(match)
return filtered_matches
def _calculate_overlap(self, match1: TemplateMatch, match2: TemplateMatch) -> float:
"""Calculate overlap ratio between two matches.
Args:
match1: First match
match2: Second match
Returns:
Overlap ratio (0.0 to 1.0)
"""
x1, y1, w1, h1 = match1.bbox
x2, y2, w2, h2 = match2.bbox
# Calculate intersection
left = max(x1, x2)
right = min(x1 + w1, x2 + w2)
top = max(y1, y2)
bottom = min(y1 + h1, y2 + h2)
if left >= right or top >= bottom:
return 0.0
intersection = (right - left) * (bottom - top)
area1 = w1 * h1
area2 = w2 * h2
union = area1 + area2 - intersection
return intersection / union if union > 0 else 0.0
class TemplateManager:
"""Manages a collection of templates for game UI detection."""
def __init__(self, template_dir: Optional[Path] = None):
"""Initialize template manager.
Args:
template_dir: Directory containing template images
"""
self.template_dir = template_dir
self.templates: Dict[str, TemplateInfo] = {}
self.matcher = TemplateMatcher()
if template_dir and template_dir.exists():
self.load_templates_from_directory(template_dir)
def load_template(self, name: str, image_path: Path) -> bool:
"""Load single template from file.
Args:
name: Template identifier
image_path: Path to template image
Returns:
True if loaded successfully
"""
try:
image = cv2.imread(str(image_path))
if image is None:
logger.error(f"Could not load template image: {image_path}")
return False
height, width = image.shape[:2]
self.templates[name] = TemplateInfo(
name=name,
image=image,
width=width,
height=height,
path=str(image_path)
)
logger.info(f"Loaded template '{name}' ({width}x{height})")
return True
except Exception as e:
logger.error(f"Failed to load template '{name}': {e}")
return False
def load_templates_from_directory(self, directory: Path) -> int:
"""Load all templates from directory.
Args:
directory: Directory containing template images
Returns:
Number of templates loaded
"""
loaded_count = 0
for image_path in directory.glob("*.png"):
template_name = image_path.stem
if self.load_template(template_name, image_path):
loaded_count += 1
logger.info(f"Loaded {loaded_count} templates from {directory}")
return loaded_count
def find_template(self, image: np.ndarray, template_name: str,
threshold: Optional[float] = None) -> List[TemplateMatch]:
"""Find specific template in image.
Args:
image: Source image
template_name: Name of template to find
threshold: Confidence threshold override
Returns:
List of matches found
"""
if template_name not in self.templates:
logger.warning(f"Template '{template_name}' not found")
return []
template_info = self.templates[template_name]
matches = self.matcher.match_template(image, template_info.image, threshold)
# Set template name in matches
named_matches = []
for match in matches:
named_match = TemplateMatch(
template_name=template_name,
confidence=match.confidence,
center=match.center,
bbox=match.bbox
)
named_matches.append(named_match)
return named_matches
def find_any_template(self, image: np.ndarray,
template_names: Optional[List[str]] = None,
threshold: Optional[float] = None) -> List[TemplateMatch]:
"""Find any of the specified templates in image.
Args:
image: Source image
template_names: List of template names to search for, or None for all
threshold: Confidence threshold override
Returns:
List of all matches found
"""
if template_names is None:
template_names = list(self.templates.keys())
all_matches = []
for template_name in template_names:
matches = self.find_template(image, template_name, threshold)
all_matches.extend(matches)
# Sort by confidence
all_matches.sort(key=lambda x: x.confidence, reverse=True)
return all_matches
def wait_for_template(self, capture_func, template_name: str,
timeout: float = 10.0, check_interval: float = 0.5,
threshold: Optional[float] = None) -> Optional[TemplateMatch]:
"""Wait for template to appear on screen.
Args:
capture_func: Function that returns screenshot
template_name: Template to wait for
timeout: Maximum wait time in seconds
check_interval: Time between checks in seconds
threshold: Confidence threshold override
Returns:
First match found, or None if timeout
"""
import time
start_time = time.time()
while time.time() - start_time < timeout:
image = capture_func()
matches = self.find_template(image, template_name, threshold)
if matches:
return matches[0] # Return best match
time.sleep(check_interval)
return None
def get_template_info(self, template_name: str) -> Optional[TemplateInfo]:
"""Get information about loaded template.
Args:
template_name: Name of template
Returns:
TemplateInfo object or None if not found
"""
return self.templates.get(template_name)
def list_templates(self) -> List[str]:
"""Get list of all loaded template names.
Returns:
List of template names
"""
return list(self.templates.keys())
def create_debug_image(self, image: np.ndarray, matches: List[TemplateMatch]) -> np.ndarray:
"""Create debug image showing template matches.
Args:
image: Original image
matches: List of matches to highlight
Returns:
Debug image with matches drawn
"""
debug_img = image.copy()
for match in matches:
x, y, w, h = match.bbox
# Draw bounding box
cv2.rectangle(debug_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Draw center point
center_x, center_y = match.center
cv2.circle(debug_img, (center_x, center_y), 5, (255, 0, 0), -1)
# Draw template name and confidence
label = f"{match.template_name}: {match.confidence:.2f}"
cv2.putText(debug_img, label, (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
return debug_img

0
engine/state/__init__.py Normal file
View file

34
engine/state/events.py Normal file
View file

@ -0,0 +1,34 @@
"""Event system for inter-component communication."""
from typing import Callable, Any, Dict, List
import logging
logger = logging.getLogger(__name__)
class EventBus:
"""Simple publish/subscribe event system."""
def __init__(self):
self._listeners: Dict[str, List[Callable]] = {}
def on(self, event: str, callback: Callable) -> None:
"""Subscribe to an event."""
self._listeners.setdefault(event, []).append(callback)
def off(self, event: str, callback: Callable) -> None:
"""Unsubscribe from an event."""
if event in self._listeners:
self._listeners[event] = [cb for cb in self._listeners[event] if cb != callback]
def emit(self, event: str, **data: Any) -> None:
"""Emit an event to all subscribers."""
for cb in self._listeners.get(event, []):
try:
cb(**data)
except Exception as e:
logger.error(f"Event handler error for '{event}': {e}")
def clear(self) -> None:
"""Remove all listeners."""
self._listeners.clear()

105
engine/state/manager.py Normal file
View file

@ -0,0 +1,105 @@
"""Game state machine management.
Provides a base state manager that game implementations extend
to detect and track game states (menu, in-game, inventory, etc.).
"""
from typing import Optional, Callable, Dict, Any
from enum import Enum, auto
from dataclasses import dataclass
import logging
import time
import numpy as np
logger = logging.getLogger(__name__)
class BaseGameState(Enum):
"""Base states common to most games."""
UNKNOWN = auto()
LOADING = auto()
MAIN_MENU = auto()
CHARACTER_SELECT = auto()
IN_GAME = auto()
INVENTORY = auto()
DEAD = auto()
DISCONNECTED = auto()
@dataclass
class StateTransition:
"""Records a state transition."""
from_state: BaseGameState
to_state: BaseGameState
timestamp: float
metadata: Dict[str, Any] = None
class GameStateManager:
"""Base class for game state detection and management.
Game implementations should subclass this and implement
detect_state() with game-specific screen analysis.
"""
def __init__(self):
self._current_state: BaseGameState = BaseGameState.UNKNOWN
self._previous_state: BaseGameState = BaseGameState.UNKNOWN
self._state_enter_time: float = time.time()
self._history: list[StateTransition] = []
self._callbacks: Dict[BaseGameState, list[Callable]] = {}
@property
def current_state(self) -> BaseGameState:
return self._current_state
@property
def previous_state(self) -> BaseGameState:
return self._previous_state
@property
def time_in_state(self) -> float:
"""Seconds spent in current state."""
return time.time() - self._state_enter_time
def detect_state(self, screen: np.ndarray) -> BaseGameState:
"""Detect current game state from screenshot.
Must be overridden by game implementations.
"""
raise NotImplementedError("Subclasses must implement detect_state()")
def update(self, screen: np.ndarray) -> BaseGameState:
"""Update state from current screen. Triggers callbacks on change."""
new_state = self.detect_state(screen)
if new_state != self._current_state:
transition = StateTransition(
from_state=self._current_state,
to_state=new_state,
timestamp=time.time(),
)
self._history.append(transition)
logger.info(f"State: {self._current_state.name}{new_state.name}")
self._previous_state = self._current_state
self._current_state = new_state
self._state_enter_time = time.time()
# Fire callbacks
for cb in self._callbacks.get(new_state, []):
try:
cb(transition)
except Exception as e:
logger.error(f"State callback error: {e}")
return self._current_state
def on_state(self, state: BaseGameState, callback: Callable) -> None:
"""Register a callback for when entering a state."""
self._callbacks.setdefault(state, []).append(callback)
def is_state(self, state: BaseGameState) -> bool:
return self._current_state == state

View file

87
engine/vision/color.py Normal file
View file

@ -0,0 +1,87 @@
"""Color and pixel analysis utilities.
Provides tools for reading health/mana bars, detecting UI states
via color sampling, and pixel-level game state detection.
"""
from typing import Tuple, Optional, List
import logging
import numpy as np
import cv2
logger = logging.getLogger(__name__)
class ColorAnalyzer:
"""Analyze pixel colors and UI bar states."""
@staticmethod
def get_pixel_color(screen: np.ndarray, x: int, y: int) -> Tuple[int, int, int]:
"""Get BGR color at pixel position."""
return tuple(screen[y, x].tolist())
@staticmethod
def get_pixel_hsv(screen: np.ndarray, x: int, y: int) -> Tuple[int, int, int]:
"""Get HSV color at pixel position."""
hsv = cv2.cvtColor(screen[y:y+1, x:x+1], cv2.COLOR_BGR2HSV)
return tuple(hsv[0, 0].tolist())
@staticmethod
def color_matches(
color: Tuple[int, int, int],
target: Tuple[int, int, int],
tolerance: int = 20,
) -> bool:
"""Check if a color matches target within tolerance."""
return all(abs(c - t) <= tolerance for c, t in zip(color, target))
@staticmethod
def read_bar_percentage(
screen: np.ndarray,
bar_region: Tuple[int, int, int, int],
filled_color_hsv: Tuple[Tuple[int, int, int], Tuple[int, int, int]],
) -> float:
"""Read a horizontal bar's fill percentage (health, mana, xp, etc.).
Args:
screen: Screenshot in BGR
bar_region: (x, y, width, height) of the bar
filled_color_hsv: (lower_hsv, upper_hsv) range of the filled portion
Returns:
Fill percentage 0.0 to 1.0
"""
x, y, w, h = bar_region
bar = screen[y:y+h, x:x+w]
hsv = cv2.cvtColor(bar, cv2.COLOR_BGR2HSV)
lower, upper = filled_color_hsv
mask = cv2.inRange(hsv, np.array(lower), np.array(upper))
# Scan columns left to right to find the fill boundary
col_fill = np.mean(mask, axis=0) / 255.0
# Find the rightmost column that's mostly filled
threshold = 0.3
filled_cols = np.where(col_fill > threshold)[0]
if len(filled_cols) == 0:
return 0.0
return (filled_cols[-1] + 1) / w
@staticmethod
def sample_region_dominant_color(
screen: np.ndarray,
region: Tuple[int, int, int, int],
) -> Tuple[int, int, int]:
"""Get the dominant BGR color in a region."""
x, y, w, h = region
roi = screen[y:y+h, x:x+w]
pixels = roi.reshape(-1, 3).astype(np.float32)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
_, labels, centers = cv2.kmeans(pixels, 1, None, criteria, 3, cv2.KMEANS_RANDOM_CENTERS)
return tuple(centers[0].astype(int).tolist())

140
engine/vision/detector.py Normal file
View file

@ -0,0 +1,140 @@
"""Object and UI element detection using computer vision.
Provides high-level detection for game elements using template matching,
color filtering, and contour analysis.
"""
from typing import List, Optional, Tuple
from dataclasses import dataclass
import logging
import numpy as np
import cv2
logger = logging.getLogger(__name__)
@dataclass
class Detection:
"""Represents a detected object/element on screen."""
x: int
y: int
width: int
height: int
confidence: float
label: str = ""
@property
def center(self) -> Tuple[int, int]:
return (self.x + self.width // 2, self.y + self.height // 2)
@property
def bounds(self) -> Tuple[int, int, int, int]:
return (self.x, self.y, self.x + self.width, self.y + self.height)
class ElementDetector:
"""Detects game UI elements and objects via computer vision."""
def __init__(self, confidence_threshold: float = 0.8):
self.confidence_threshold = confidence_threshold
self._templates: dict[str, np.ndarray] = {}
def load_template(self, name: str, image_path: str) -> None:
"""Load a template image for matching."""
template = cv2.imread(image_path, cv2.IMREAD_COLOR)
if template is None:
raise FileNotFoundError(f"Template not found: {image_path}")
self._templates[name] = template
logger.debug(f"Loaded template '{name}': {template.shape}")
def find_template(
self, screen: np.ndarray, template_name: str,
method: int = cv2.TM_CCOEFF_NORMED,
) -> Optional[Detection]:
"""Find best match of a template in the screen image."""
if template_name not in self._templates:
logger.error(f"Unknown template: {template_name}")
return None
template = self._templates[template_name]
result = cv2.matchTemplate(screen, template, method)
_, max_val, _, max_loc = cv2.minMaxLoc(result)
if max_val >= self.confidence_threshold:
h, w = template.shape[:2]
return Detection(
x=max_loc[0], y=max_loc[1],
width=w, height=h,
confidence=max_val, label=template_name,
)
return None
def find_all_templates(
self, screen: np.ndarray, template_name: str,
method: int = cv2.TM_CCOEFF_NORMED,
) -> List[Detection]:
"""Find all matches of a template above confidence threshold."""
if template_name not in self._templates:
return []
template = self._templates[template_name]
h, w = template.shape[:2]
result = cv2.matchTemplate(screen, template, method)
locations = np.where(result >= self.confidence_threshold)
detections = []
for pt in zip(*locations[::-1]):
detections.append(Detection(
x=pt[0], y=pt[1], width=w, height=h,
confidence=result[pt[1], pt[0]], label=template_name,
))
# Non-maximum suppression (simple distance-based)
return self._nms(detections, distance_threshold=min(w, h) // 2)
def find_by_color(
self, screen: np.ndarray, lower_hsv: Tuple[int, int, int],
upper_hsv: Tuple[int, int, int], min_area: int = 100,
label: str = "",
) -> List[Detection]:
"""Find objects by HSV color range."""
hsv = cv2.cvtColor(screen, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, np.array(lower_hsv), np.array(upper_hsv))
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
detections = []
for contour in contours:
area = cv2.contourArea(contour)
if area >= min_area:
x, y, w, h = cv2.boundingRect(contour)
detections.append(Detection(
x=x, y=y, width=w, height=h,
confidence=area / (w * h), label=label,
))
return detections
def _nms(self, detections: List[Detection], distance_threshold: int) -> List[Detection]:
"""Simple non-maximum suppression by distance."""
if not detections:
return []
detections.sort(key=lambda d: d.confidence, reverse=True)
kept = []
for det in detections:
too_close = False
for k in kept:
dx = abs(det.center[0] - k.center[0])
dy = abs(det.center[1] - k.center[1])
if dx < distance_threshold and dy < distance_threshold:
too_close = True
break
if not too_close:
kept.append(det)
return kept