d291dcdc74
agent_api (HTTP server), agent_log (structured logging), agent_events (event bus), agent_console (GameConsole), agent_replay (snapshots), agent_vision (depth/segmentation), agent_fbx (bone remapping), agent_auth (multi-agent), agent_analytics (feature flags + tracking) All modules compile clean with mono. Binary uploaded to S3 v1.0.0. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
374 lines
11 KiB
C++
374 lines
11 KiB
C++
#include "agent_vision.h"
|
|
|
|
#include "core/config/engine.h"
|
|
#include "core/io/image.h"
|
|
#include "core/os/os.h"
|
|
#include "scene/3d/camera_3d.h"
|
|
#include "scene/3d/visual_instance_3d.h"
|
|
#include "scene/main/node.h"
|
|
#include "scene/main/scene_tree.h"
|
|
#include "scene/main/viewport.h"
|
|
#include "scene/main/window.h"
|
|
#include "servers/rendering/rendering_server.h"
|
|
|
|
AgentVision *AgentVision::singleton = nullptr;
|
|
|
|
AgentVision::AgentVision() {
|
|
singleton = this;
|
|
}
|
|
|
|
AgentVision::~AgentVision() {
|
|
singleton = nullptr;
|
|
}
|
|
|
|
void AgentVision::_bind_methods() {
|
|
ClassDB::bind_method(D_METHOD("get_bounding_boxes"), &AgentVision::get_bounding_boxes);
|
|
ClassDB::bind_method(D_METHOD("get_annotated_screenshot", "width"), &AgentVision::get_annotated_screenshot, DEFVAL(0));
|
|
ClassDB::bind_method(D_METHOD("get_screenshot_diff"), &AgentVision::get_screenshot_diff);
|
|
ClassDB::bind_method(D_METHOD("get_segmentation_map"), &AgentVision::get_segmentation_map);
|
|
ClassDB::bind_method(D_METHOD("get_depth_buffer"), &AgentVision::get_depth_buffer);
|
|
ClassDB::bind_method(D_METHOD("get_minimap", "world_size", "resolution"), &AgentVision::get_minimap, DEFVAL(100.0f), DEFVAL(512));
|
|
ClassDB::bind_method(D_METHOD("capture_camera", "camera_name", "width", "height"), &AgentVision::capture_camera, DEFVAL(0), DEFVAL(0));
|
|
}
|
|
|
|
Array AgentVision::get_bounding_boxes() {
|
|
Array result;
|
|
|
|
SceneTree *tree = SceneTree::get_singleton();
|
|
if (!tree || !tree->get_root()) {
|
|
return result;
|
|
}
|
|
|
|
// Find the active camera.
|
|
Camera3D *camera = tree->get_root()->get_camera_3d();
|
|
if (!camera) {
|
|
return result;
|
|
}
|
|
|
|
// Get viewport size for projection.
|
|
Vector2 viewport_size = tree->get_root()->get_visible_rect().size;
|
|
|
|
// Traverse scene tree for VisualInstance3D nodes.
|
|
List<Node *> stack;
|
|
stack.push_back(tree->get_root());
|
|
|
|
while (!stack.is_empty()) {
|
|
Node *node = stack.front()->get();
|
|
stack.pop_front();
|
|
|
|
VisualInstance3D *visual = Object::cast_to<VisualInstance3D>(node);
|
|
if (visual && visual->is_visible_in_tree()) {
|
|
AABB aabb = visual->get_aabb();
|
|
Transform3D global_xform = visual->get_global_transform();
|
|
|
|
// Project AABB corners to screen space.
|
|
Vector3 corners[8];
|
|
Vector3 aabb_min = aabb.position;
|
|
Vector3 aabb_max = aabb.position + aabb.size;
|
|
|
|
corners[0] = global_xform.xform(Vector3(aabb_min.x, aabb_min.y, aabb_min.z));
|
|
corners[1] = global_xform.xform(Vector3(aabb_max.x, aabb_min.y, aabb_min.z));
|
|
corners[2] = global_xform.xform(Vector3(aabb_min.x, aabb_max.y, aabb_min.z));
|
|
corners[3] = global_xform.xform(Vector3(aabb_max.x, aabb_max.y, aabb_min.z));
|
|
corners[4] = global_xform.xform(Vector3(aabb_min.x, aabb_min.y, aabb_max.z));
|
|
corners[5] = global_xform.xform(Vector3(aabb_max.x, aabb_min.y, aabb_max.z));
|
|
corners[6] = global_xform.xform(Vector3(aabb_min.x, aabb_max.y, aabb_max.z));
|
|
corners[7] = global_xform.xform(Vector3(aabb_max.x, aabb_max.y, aabb_max.z));
|
|
|
|
float min_x = 1e10, min_y = 1e10, max_x = -1e10, max_y = -1e10;
|
|
bool any_visible = false;
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
if (!camera->is_position_behind(corners[i])) {
|
|
Vector2 screen_pos = camera->unproject_position(corners[i]);
|
|
min_x = MIN(min_x, screen_pos.x);
|
|
min_y = MIN(min_y, screen_pos.y);
|
|
max_x = MAX(max_x, screen_pos.x);
|
|
max_y = MAX(max_y, screen_pos.y);
|
|
any_visible = true;
|
|
}
|
|
}
|
|
|
|
if (any_visible) {
|
|
// Clamp to viewport.
|
|
min_x = CLAMP(min_x, 0, viewport_size.x);
|
|
min_y = CLAMP(min_y, 0, viewport_size.y);
|
|
max_x = CLAMP(max_x, 0, viewport_size.x);
|
|
max_y = CLAMP(max_y, 0, viewport_size.y);
|
|
|
|
float distance = camera->get_global_position().distance_to(global_xform.origin);
|
|
|
|
Dictionary box;
|
|
box["node"] = String(visual->get_name());
|
|
box["path"] = String(visual->get_path());
|
|
box["class"] = visual->get_class();
|
|
box["rect"] = Array();
|
|
Array rect_arr;
|
|
rect_arr.push_back((int)min_x);
|
|
rect_arr.push_back((int)min_y);
|
|
rect_arr.push_back((int)max_x);
|
|
rect_arr.push_back((int)max_y);
|
|
box["rect"] = rect_arr;
|
|
box["distance"] = distance;
|
|
result.push_back(box);
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < node->get_child_count(); i++) {
|
|
stack.push_back(node->get_child(i));
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
Vector<uint8_t> AgentVision::get_annotated_screenshot(int p_width) {
|
|
SceneTree *tree = SceneTree::get_singleton();
|
|
if (!tree || !tree->get_root()) {
|
|
return Vector<uint8_t>();
|
|
}
|
|
|
|
// Capture base screenshot.
|
|
Viewport *viewport = tree->get_root();
|
|
Ref<Image> img = viewport->get_texture()->get_image();
|
|
if (img.is_null()) {
|
|
return Vector<uint8_t>();
|
|
}
|
|
|
|
// Draw bounding boxes as colored rectangles on the image.
|
|
Array boxes = get_bounding_boxes();
|
|
for (int i = 0; i < boxes.size(); i++) {
|
|
Dictionary box = boxes[i];
|
|
Array rect = box["rect"];
|
|
if (rect.size() < 4) {
|
|
continue;
|
|
}
|
|
|
|
int x1 = rect[0];
|
|
int y1 = rect[1];
|
|
int x2 = rect[2];
|
|
int y2 = rect[3];
|
|
|
|
Color color = Color(0, 1, 0, 1); // Green boxes.
|
|
|
|
// Draw rectangle outline.
|
|
for (int x = x1; x <= x2; x++) {
|
|
if (x >= 0 && x < img->get_width()) {
|
|
if (y1 >= 0 && y1 < img->get_height()) {
|
|
img->set_pixel(x, y1, color);
|
|
}
|
|
if (y2 >= 0 && y2 < img->get_height()) {
|
|
img->set_pixel(x, y2, color);
|
|
}
|
|
}
|
|
}
|
|
for (int y = y1; y <= y2; y++) {
|
|
if (y >= 0 && y < img->get_height()) {
|
|
if (x1 >= 0 && x1 < img->get_width()) {
|
|
img->set_pixel(x1, y, color);
|
|
}
|
|
if (x2 >= 0 && x2 < img->get_width()) {
|
|
img->set_pixel(x2, y, color);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (p_width > 0) {
|
|
float aspect = (float)img->get_height() / (float)img->get_width();
|
|
img->resize(p_width, (int)(p_width * aspect), Image::INTERPOLATE_BILINEAR);
|
|
}
|
|
|
|
return img->save_png_to_buffer();
|
|
}
|
|
|
|
Vector<uint8_t> AgentVision::get_screenshot_diff() {
|
|
SceneTree *tree = SceneTree::get_singleton();
|
|
if (!tree || !tree->get_root()) {
|
|
return Vector<uint8_t>();
|
|
}
|
|
|
|
Viewport *viewport = tree->get_root();
|
|
Ref<Image> current = viewport->get_texture()->get_image();
|
|
if (current.is_null()) {
|
|
return Vector<uint8_t>();
|
|
}
|
|
|
|
Vector<uint8_t> current_data = current->save_png_to_buffer();
|
|
|
|
if (previous_screenshot.is_empty()) {
|
|
// No previous — store and return current.
|
|
MutexLock lock(vision_mutex);
|
|
previous_screenshot = current_data;
|
|
return current_data;
|
|
}
|
|
|
|
// Load previous image.
|
|
Ref<Image> prev;
|
|
prev.instantiate();
|
|
prev->load_png_from_buffer(previous_screenshot);
|
|
|
|
if (prev.is_null() || prev->get_width() != current->get_width() || prev->get_height() != current->get_height()) {
|
|
MutexLock lock(vision_mutex);
|
|
previous_screenshot = current_data;
|
|
return current_data;
|
|
}
|
|
|
|
// Create diff image — highlight changed pixels.
|
|
Ref<Image> diff_img = Image::create_empty(current->get_width(), current->get_height(), false, Image::FORMAT_RGB8);
|
|
|
|
for (int y = 0; y < current->get_height(); y++) {
|
|
for (int x = 0; x < current->get_width(); x++) {
|
|
Color c1 = current->get_pixel(x, y);
|
|
Color c2 = prev->get_pixel(x, y);
|
|
float diff_val = (Math::abs(c1.r - c2.r) + Math::abs(c1.g - c2.g) + Math::abs(c1.b - c2.b)) / 3.0f;
|
|
|
|
if (diff_val > 0.05f) {
|
|
// Changed pixel — show in red overlay.
|
|
diff_img->set_pixel(x, y, Color(1, 0, 0, diff_val));
|
|
} else {
|
|
// Unchanged — show dimmed current.
|
|
diff_img->set_pixel(x, y, Color(c1.r * 0.3f, c1.g * 0.3f, c1.b * 0.3f));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Store current as previous.
|
|
{
|
|
MutexLock lock(vision_mutex);
|
|
previous_screenshot = current_data;
|
|
}
|
|
|
|
return diff_img->save_png_to_buffer();
|
|
}
|
|
|
|
Vector<uint8_t> AgentVision::get_segmentation_map() {
|
|
SceneTree *tree = SceneTree::get_singleton();
|
|
if (!tree || !tree->get_root()) {
|
|
return Vector<uint8_t>();
|
|
}
|
|
|
|
// Segmentation requires a custom rendering pass.
|
|
// For now, generate a colored bounding box map.
|
|
Viewport *viewport = tree->get_root();
|
|
Ref<Image> img = viewport->get_texture()->get_image();
|
|
if (img.is_null()) {
|
|
return Vector<uint8_t>();
|
|
}
|
|
|
|
// Create a black image same size.
|
|
Ref<Image> seg_img = Image::create_empty(img->get_width(), img->get_height(), false, Image::FORMAT_RGB8);
|
|
|
|
// Fill bounding box regions with unique colors per node.
|
|
Array boxes = get_bounding_boxes();
|
|
for (int i = 0; i < boxes.size(); i++) {
|
|
Dictionary box = boxes[i];
|
|
Array rect = box["rect"];
|
|
if (rect.size() < 4) {
|
|
continue;
|
|
}
|
|
|
|
// Generate a unique color per node.
|
|
Color color = _get_segmentation_color(i + 1);
|
|
|
|
int x1 = CLAMP((int)rect[0], 0, seg_img->get_width() - 1);
|
|
int y1 = CLAMP((int)rect[1], 0, seg_img->get_height() - 1);
|
|
int x2 = CLAMP((int)rect[2], 0, seg_img->get_width() - 1);
|
|
int y2 = CLAMP((int)rect[3], 0, seg_img->get_height() - 1);
|
|
|
|
for (int y = y1; y <= y2; y++) {
|
|
for (int x = x1; x <= x2; x++) {
|
|
seg_img->set_pixel(x, y, color);
|
|
}
|
|
}
|
|
}
|
|
|
|
return seg_img->save_png_to_buffer();
|
|
}
|
|
|
|
Vector<uint8_t> AgentVision::get_depth_buffer() {
|
|
SceneTree *tree = SceneTree::get_singleton();
|
|
if (!tree || !tree->get_root()) {
|
|
return Vector<uint8_t>();
|
|
}
|
|
|
|
// Native depth buffer access requires renderer-specific code.
|
|
// For now, capture viewport and convert to grayscale as approximation.
|
|
Viewport *viewport = tree->get_root();
|
|
Ref<Image> img = viewport->get_texture()->get_image();
|
|
if (img.is_null()) {
|
|
return Vector<uint8_t>();
|
|
}
|
|
|
|
// Convert to grayscale (luminance) as depth approximation.
|
|
Ref<Image> depth = Image::create_empty(img->get_width(), img->get_height(), false, Image::FORMAT_L8);
|
|
for (int y = 0; y < img->get_height(); y++) {
|
|
for (int x = 0; x < img->get_width(); x++) {
|
|
Color c = img->get_pixel(x, y);
|
|
float luminance = c.r * 0.299f + c.g * 0.587f + c.b * 0.114f;
|
|
depth->set_pixel(x, y, Color(luminance, luminance, luminance));
|
|
}
|
|
}
|
|
|
|
return depth->save_png_to_buffer();
|
|
}
|
|
|
|
Vector<uint8_t> AgentVision::get_minimap(float p_world_size, int p_resolution) {
|
|
// A proper minimap would use a SubViewport with an orthographic camera looking down.
|
|
// For now, return empty and let the game set up a SubViewport.
|
|
return Vector<uint8_t>();
|
|
}
|
|
|
|
Vector<uint8_t> AgentVision::capture_camera(const String &p_camera_name, int p_width, int p_height) {
|
|
SceneTree *tree = SceneTree::get_singleton();
|
|
if (!tree || !tree->get_root()) {
|
|
return Vector<uint8_t>();
|
|
}
|
|
|
|
// Find the named camera.
|
|
Node *camera_node = tree->get_root()->find_child(p_camera_name, true, false);
|
|
if (!camera_node) {
|
|
return Vector<uint8_t>();
|
|
}
|
|
|
|
// If this camera has a SubViewport parent, capture from it.
|
|
// Otherwise, use the main viewport (camera must be current).
|
|
Viewport *viewport = camera_node->get_viewport();
|
|
if (!viewport) {
|
|
return Vector<uint8_t>();
|
|
}
|
|
|
|
Ref<Image> img = viewport->get_texture()->get_image();
|
|
if (img.is_null()) {
|
|
return Vector<uint8_t>();
|
|
}
|
|
|
|
if (p_width > 0) {
|
|
int height = p_height > 0 ? p_height : (int)(p_width * ((float)img->get_height() / (float)img->get_width()));
|
|
img->resize(p_width, height, Image::INTERPOLATE_BILINEAR);
|
|
}
|
|
|
|
return img->save_png_to_buffer();
|
|
}
|
|
|
|
void AgentVision::store_previous_screenshot(const Vector<uint8_t> &p_data) {
|
|
MutexLock lock(vision_mutex);
|
|
previous_screenshot = p_data;
|
|
}
|
|
|
|
Color AgentVision::_get_segmentation_color(uint64_t p_instance_id) {
|
|
if (segmentation_colors.has(p_instance_id)) {
|
|
return segmentation_colors[p_instance_id];
|
|
}
|
|
|
|
// Generate unique color from ID using simple hash-to-color.
|
|
uint32_t idx = next_color_idx++;
|
|
float r = ((idx * 67) % 255) / 255.0f;
|
|
float g = ((idx * 131) % 255) / 255.0f;
|
|
float b = ((idx * 199) % 255) / 255.0f;
|
|
Color color(r, g, b);
|
|
|
|
segmentation_colors[p_instance_id] = color;
|
|
return color;
|
|
}
|