#include "agent_vision.h" #include "core/config/engine.h" #include "core/io/image.h" #include "core/os/os.h" #include "scene/3d/camera_3d.h" #include "scene/3d/visual_instance_3d.h" #include "scene/main/node.h" #include "scene/main/scene_tree.h" #include "scene/main/viewport.h" #include "scene/main/window.h" #include "servers/rendering/rendering_server.h" AgentVision *AgentVision::singleton = nullptr; AgentVision::AgentVision() { singleton = this; } AgentVision::~AgentVision() { singleton = nullptr; } void AgentVision::_bind_methods() { ClassDB::bind_method(D_METHOD("get_bounding_boxes"), &AgentVision::get_bounding_boxes); ClassDB::bind_method(D_METHOD("get_annotated_screenshot", "width"), &AgentVision::get_annotated_screenshot, DEFVAL(0)); ClassDB::bind_method(D_METHOD("get_screenshot_diff"), &AgentVision::get_screenshot_diff); ClassDB::bind_method(D_METHOD("get_segmentation_map"), &AgentVision::get_segmentation_map); ClassDB::bind_method(D_METHOD("get_depth_buffer"), &AgentVision::get_depth_buffer); ClassDB::bind_method(D_METHOD("get_minimap", "world_size", "resolution"), &AgentVision::get_minimap, DEFVAL(100.0f), DEFVAL(512)); ClassDB::bind_method(D_METHOD("capture_camera", "camera_name", "width", "height"), &AgentVision::capture_camera, DEFVAL(0), DEFVAL(0)); } Array AgentVision::get_bounding_boxes() { Array result; SceneTree *tree = SceneTree::get_singleton(); if (!tree || !tree->get_root()) { return result; } // Find the active camera. Camera3D *camera = tree->get_root()->get_camera_3d(); if (!camera) { return result; } // Get viewport size for projection. Vector2 viewport_size = tree->get_root()->get_visible_rect().size; // Traverse scene tree for VisualInstance3D nodes. List stack; stack.push_back(tree->get_root()); while (!stack.is_empty()) { Node *node = stack.front()->get(); stack.pop_front(); VisualInstance3D *visual = Object::cast_to(node); if (visual && visual->is_visible_in_tree()) { AABB aabb = visual->get_aabb(); Transform3D global_xform = visual->get_global_transform(); // Project AABB corners to screen space. Vector3 corners[8]; Vector3 aabb_min = aabb.position; Vector3 aabb_max = aabb.position + aabb.size; corners[0] = global_xform.xform(Vector3(aabb_min.x, aabb_min.y, aabb_min.z)); corners[1] = global_xform.xform(Vector3(aabb_max.x, aabb_min.y, aabb_min.z)); corners[2] = global_xform.xform(Vector3(aabb_min.x, aabb_max.y, aabb_min.z)); corners[3] = global_xform.xform(Vector3(aabb_max.x, aabb_max.y, aabb_min.z)); corners[4] = global_xform.xform(Vector3(aabb_min.x, aabb_min.y, aabb_max.z)); corners[5] = global_xform.xform(Vector3(aabb_max.x, aabb_min.y, aabb_max.z)); corners[6] = global_xform.xform(Vector3(aabb_min.x, aabb_max.y, aabb_max.z)); corners[7] = global_xform.xform(Vector3(aabb_max.x, aabb_max.y, aabb_max.z)); float min_x = 1e10, min_y = 1e10, max_x = -1e10, max_y = -1e10; bool any_visible = false; for (int i = 0; i < 8; i++) { if (!camera->is_position_behind(corners[i])) { Vector2 screen_pos = camera->unproject_position(corners[i]); min_x = MIN(min_x, screen_pos.x); min_y = MIN(min_y, screen_pos.y); max_x = MAX(max_x, screen_pos.x); max_y = MAX(max_y, screen_pos.y); any_visible = true; } } if (any_visible) { // Clamp to viewport. min_x = CLAMP(min_x, 0, viewport_size.x); min_y = CLAMP(min_y, 0, viewport_size.y); max_x = CLAMP(max_x, 0, viewport_size.x); max_y = CLAMP(max_y, 0, viewport_size.y); float distance = camera->get_global_position().distance_to(global_xform.origin); Dictionary box; box["node"] = String(visual->get_name()); box["path"] = String(visual->get_path()); box["class"] = visual->get_class(); box["rect"] = Array(); Array rect_arr; rect_arr.push_back((int)min_x); rect_arr.push_back((int)min_y); rect_arr.push_back((int)max_x); rect_arr.push_back((int)max_y); box["rect"] = rect_arr; box["distance"] = distance; result.push_back(box); } } for (int i = 0; i < node->get_child_count(); i++) { stack.push_back(node->get_child(i)); } } return result; } Vector AgentVision::get_annotated_screenshot(int p_width) { SceneTree *tree = SceneTree::get_singleton(); if (!tree || !tree->get_root()) { return Vector(); } // Capture base screenshot. Viewport *viewport = tree->get_root(); Ref img = viewport->get_texture()->get_image(); if (img.is_null()) { return Vector(); } // Draw bounding boxes as colored rectangles on the image. Array boxes = get_bounding_boxes(); for (int i = 0; i < boxes.size(); i++) { Dictionary box = boxes[i]; Array rect = box["rect"]; if (rect.size() < 4) { continue; } int x1 = rect[0]; int y1 = rect[1]; int x2 = rect[2]; int y2 = rect[3]; Color color = Color(0, 1, 0, 1); // Green boxes. // Draw rectangle outline. for (int x = x1; x <= x2; x++) { if (x >= 0 && x < img->get_width()) { if (y1 >= 0 && y1 < img->get_height()) { img->set_pixel(x, y1, color); } if (y2 >= 0 && y2 < img->get_height()) { img->set_pixel(x, y2, color); } } } for (int y = y1; y <= y2; y++) { if (y >= 0 && y < img->get_height()) { if (x1 >= 0 && x1 < img->get_width()) { img->set_pixel(x1, y, color); } if (x2 >= 0 && x2 < img->get_width()) { img->set_pixel(x2, y, color); } } } } if (p_width > 0) { float aspect = (float)img->get_height() / (float)img->get_width(); img->resize(p_width, (int)(p_width * aspect), Image::INTERPOLATE_BILINEAR); } return img->save_png_to_buffer(); } Vector AgentVision::get_screenshot_diff() { SceneTree *tree = SceneTree::get_singleton(); if (!tree || !tree->get_root()) { return Vector(); } Viewport *viewport = tree->get_root(); Ref current = viewport->get_texture()->get_image(); if (current.is_null()) { return Vector(); } Vector current_data = current->save_png_to_buffer(); if (previous_screenshot.is_empty()) { // No previous — store and return current. MutexLock lock(vision_mutex); previous_screenshot = current_data; return current_data; } // Load previous image. Ref prev; prev.instantiate(); prev->load_png_from_buffer(previous_screenshot); if (prev.is_null() || prev->get_width() != current->get_width() || prev->get_height() != current->get_height()) { MutexLock lock(vision_mutex); previous_screenshot = current_data; return current_data; } // Create diff image — highlight changed pixels. Ref diff_img = Image::create_empty(current->get_width(), current->get_height(), false, Image::FORMAT_RGB8); for (int y = 0; y < current->get_height(); y++) { for (int x = 0; x < current->get_width(); x++) { Color c1 = current->get_pixel(x, y); Color c2 = prev->get_pixel(x, y); float diff_val = (Math::abs(c1.r - c2.r) + Math::abs(c1.g - c2.g) + Math::abs(c1.b - c2.b)) / 3.0f; if (diff_val > 0.05f) { // Changed pixel — show in red overlay. diff_img->set_pixel(x, y, Color(1, 0, 0, diff_val)); } else { // Unchanged — show dimmed current. diff_img->set_pixel(x, y, Color(c1.r * 0.3f, c1.g * 0.3f, c1.b * 0.3f)); } } } // Store current as previous. { MutexLock lock(vision_mutex); previous_screenshot = current_data; } return diff_img->save_png_to_buffer(); } Vector AgentVision::get_segmentation_map() { SceneTree *tree = SceneTree::get_singleton(); if (!tree || !tree->get_root()) { return Vector(); } // Segmentation requires a custom rendering pass. // For now, generate a colored bounding box map. Viewport *viewport = tree->get_root(); Ref img = viewport->get_texture()->get_image(); if (img.is_null()) { return Vector(); } // Create a black image same size. Ref seg_img = Image::create_empty(img->get_width(), img->get_height(), false, Image::FORMAT_RGB8); // Fill bounding box regions with unique colors per node. Array boxes = get_bounding_boxes(); for (int i = 0; i < boxes.size(); i++) { Dictionary box = boxes[i]; Array rect = box["rect"]; if (rect.size() < 4) { continue; } // Generate a unique color per node. Color color = _get_segmentation_color(i + 1); int x1 = CLAMP((int)rect[0], 0, seg_img->get_width() - 1); int y1 = CLAMP((int)rect[1], 0, seg_img->get_height() - 1); int x2 = CLAMP((int)rect[2], 0, seg_img->get_width() - 1); int y2 = CLAMP((int)rect[3], 0, seg_img->get_height() - 1); for (int y = y1; y <= y2; y++) { for (int x = x1; x <= x2; x++) { seg_img->set_pixel(x, y, color); } } } return seg_img->save_png_to_buffer(); } Vector AgentVision::get_depth_buffer() { SceneTree *tree = SceneTree::get_singleton(); if (!tree || !tree->get_root()) { return Vector(); } // Native depth buffer access requires renderer-specific code. // For now, capture viewport and convert to grayscale as approximation. Viewport *viewport = tree->get_root(); Ref img = viewport->get_texture()->get_image(); if (img.is_null()) { return Vector(); } // Convert to grayscale (luminance) as depth approximation. Ref depth = Image::create_empty(img->get_width(), img->get_height(), false, Image::FORMAT_L8); for (int y = 0; y < img->get_height(); y++) { for (int x = 0; x < img->get_width(); x++) { Color c = img->get_pixel(x, y); float luminance = c.r * 0.299f + c.g * 0.587f + c.b * 0.114f; depth->set_pixel(x, y, Color(luminance, luminance, luminance)); } } return depth->save_png_to_buffer(); } Vector AgentVision::get_minimap(float p_world_size, int p_resolution) { // A proper minimap would use a SubViewport with an orthographic camera looking down. // For now, return empty and let the game set up a SubViewport. return Vector(); } Vector AgentVision::capture_camera(const String &p_camera_name, int p_width, int p_height) { SceneTree *tree = SceneTree::get_singleton(); if (!tree || !tree->get_root()) { return Vector(); } // Find the named camera. Node *camera_node = tree->get_root()->find_child(p_camera_name, true, false); if (!camera_node) { return Vector(); } // If this camera has a SubViewport parent, capture from it. // Otherwise, use the main viewport (camera must be current). Viewport *viewport = camera_node->get_viewport(); if (!viewport) { return Vector(); } Ref img = viewport->get_texture()->get_image(); if (img.is_null()) { return Vector(); } if (p_width > 0) { int height = p_height > 0 ? p_height : (int)(p_width * ((float)img->get_height() / (float)img->get_width())); img->resize(p_width, height, Image::INTERPOLATE_BILINEAR); } return img->save_png_to_buffer(); } void AgentVision::store_previous_screenshot(const Vector &p_data) { MutexLock lock(vision_mutex); previous_screenshot = p_data; } Color AgentVision::_get_segmentation_color(uint64_t p_instance_id) { if (segmentation_colors.has(p_instance_id)) { return segmentation_colors[p_instance_id]; } // Generate unique color from ID using simple hash-to-color. uint32_t idx = next_color_idx++; float r = ((idx * 67) % 255) / 255.0f; float g = ((idx * 131) % 255) / 255.0f; float b = ((idx * 199) % 255) / 255.0f; Color color(r, g, b); segmentation_colors[p_instance_id] = color; return color; }