From d274163314e52750fd987a80c90b683ee461541e Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Sun, 10 May 2026 14:34:38 -0500 Subject: [PATCH 01/14] Improve tracked object rendering controls - Remove unused tracked-object rotation property from JSON, properties, and box values - Add per-object Draw Text support alongside Draw Box - Use deterministic class colors for Object Detector instead of random seeded colors - Scale tracked-object stroke widths based on preview raster size - Keep Object Detector text rendering gated by both global and per-object text settings --- src/TrackedObjectBBox.cpp | 31 +++++++++++------------ src/TrackedObjectBBox.h | 5 ++-- src/TrackedObjectBase.cpp | 2 +- src/TrackedObjectBase.h | 5 +++- src/effects/ObjectDetection.cpp | 44 ++++++++++++++++++++++++++------- src/effects/Tracker.cpp | 1 - tests/KeyFrame.cpp | 4 +-- 7 files changed, 58 insertions(+), 34 deletions(-) diff --git a/src/TrackedObjectBBox.cpp b/src/TrackedObjectBBox.cpp index ddcabcfc0..db506079b 100644 --- a/src/TrackedObjectBBox.cpp +++ b/src/TrackedObjectBBox.cpp @@ -35,7 +35,7 @@ TrackedObjectBBox::TrackedObjectBBox() // displacement as 0 and the scales as 1 for the first frame TrackedObjectBBox::TrackedObjectBBox(int Red, int Green, int Blue, int Alfa) : delta_x(0.0), delta_y(0.0), - scale_x(1.0), scale_y(1.0), rotation(0.0), + scale_x(1.0), scale_y(1.0), background_alpha(0.0), background_corner(12), stroke_width(2) , stroke_alpha(0.7), stroke(Red, Green, Blue, Alfa), @@ -153,8 +153,6 @@ BBox TrackedObjectBBox::GetBox(int64_t frame_number) currentBBox.cy += this->delta_y.GetValue(frame_number); currentBBox.width *= this->scale_x.GetValue(frame_number); currentBBox.height *= this->scale_y.GetValue(frame_number); - currentBBox.angle += this->rotation.GetValue(frame_number); - return currentBBox; } @@ -172,8 +170,6 @@ BBox TrackedObjectBBox::GetBox(int64_t frame_number) interpolatedBBox.cy += this->delta_y.GetValue(frame_number); interpolatedBBox.width *= this->scale_x.GetValue(frame_number); interpolatedBBox.height *= this->scale_y.GetValue(frame_number); - interpolatedBBox.angle += this->rotation.GetValue(frame_number); - return interpolatedBBox; } @@ -212,9 +208,10 @@ double TrackedObjectBBox::ScaledStrokeWidth(int64_t frame_number, int image_widt if (output_size.width() <= 0 || output_size.height() <= 0) return base_width; - const double raster_scale_x = static_cast(image_width) / output_size.width(); - const double raster_scale_y = static_cast(image_height) / output_size.height(); - return base_width * std::sqrt(raster_scale_x * raster_scale_y); + const double raster_scale_x = static_cast(output_size.width()) / image_width; + const double raster_scale_y = static_cast(output_size.height()) / image_height; + const double raster_scale = std::sqrt(raster_scale_x * raster_scale_y); + return base_width * std::max(raster_scale, 1.0 / std::max(raster_scale, 0.000001)); } // Interpolate the bouding-boxes properties @@ -240,7 +237,7 @@ BBox TrackedObjectBBox::InterpolateBoxes(double t1, double t2, BBox left, BBox r Point height_right(t2, right.height, openshot::InterpolationType::LINEAR); Point height = InterpolateBetween(height_left, height_right, target, 0.01); - // Interpolate the rotation angle + // Interpolate the source bounding-box angle Point angle_left(t1, left.angle, openshot::InterpolationType::LINEAR); Point angle_right(t1, right.angle, openshot::InterpolationType::LINEAR); Point angle = InterpolateBetween(angle_left, angle_right, target, 0.01); @@ -363,9 +360,9 @@ Json::Value TrackedObjectBBox::JsonValue() const root["delta_y"] = delta_y.JsonValue(); root["scale_x"] = scale_x.JsonValue(); root["scale_y"] = scale_y.JsonValue(); - root["rotation"] = rotation.JsonValue(); root["visible"] = visible.JsonValue(); root["draw_box"] = draw_box.JsonValue(); + root["draw_text"] = draw_text.JsonValue(); root["stroke"] = stroke.JsonValue(); root["background_alpha"] = background_alpha.JsonValue(); root["background_corner"] = background_corner.JsonValue(); @@ -430,12 +427,12 @@ void TrackedObjectBBox::SetJsonValue(const Json::Value root) scale_x.SetJsonValue(root["scale_x"]); if (!root["scale_y"].isNull()) scale_y.SetJsonValue(root["scale_y"]); - if (!root["rotation"].isNull()) - rotation.SetJsonValue(root["rotation"]); if (!root["visible"].isNull()) visible.SetJsonValue(root["visible"]); if (!root["draw_box"].isNull()) draw_box.SetJsonValue(root["draw_box"]); + if (!root["draw_text"].isNull()) + draw_text.SetJsonValue(root["draw_text"]); if (!root["stroke"].isNull()) stroke.SetJsonValue(root["stroke"]); if (!root["background_alpha"].isNull()) @@ -473,13 +470,16 @@ Json::Value TrackedObjectBBox::PropertiesJSON(int64_t requested_frame) const root["delta_y"] = add_property_json("Displacement Y-axis", delta_y.GetValue(requested_frame), "float", "", &delta_y, -1.0, 1.0, false, requested_frame); root["scale_x"] = add_property_json("Scale (Width)", scale_x.GetValue(requested_frame), "float", "", &scale_x, 0.0, 1.0, false, requested_frame); root["scale_y"] = add_property_json("Scale (Height)", scale_y.GetValue(requested_frame), "float", "", &scale_y, 0.0, 1.0, false, requested_frame); - root["rotation"] = add_property_json("Rotation", rotation.GetValue(requested_frame), "float", "", &rotation, 0, 360, false, requested_frame); root["visible"] = add_property_json("Visible", visible.GetValue(requested_frame), "int", "", &visible, 0, 1, true, requested_frame); root["draw_box"] = add_property_json("Draw Box", draw_box.GetValue(requested_frame), "int", "", &draw_box, 0, 1, false, requested_frame); - root["draw_box"]["choices"].append(add_property_choice_json("Yes", true, draw_box.GetValue(requested_frame))); + root["draw_box"]["choices"].append(add_property_choice_json("Yes", true, draw_box.GetValue(requested_frame))); root["draw_box"]["choices"].append(add_property_choice_json("No", false, draw_box.GetValue(requested_frame))); + root["draw_text"] = add_property_json("Draw Text", draw_text.GetValue(requested_frame), "int", "", &draw_text, 0, 1, false, requested_frame); + root["draw_text"]["choices"].append(add_property_choice_json("Yes", true, draw_text.GetValue(requested_frame))); + root["draw_text"]["choices"].append(add_property_choice_json("No", false, draw_text.GetValue(requested_frame))); + root["stroke"] = add_property_json("Border", 0.0, "color", "", NULL, 0, 255, false, requested_frame); root["stroke"]["red"] = add_property_json("Red", stroke.red.GetValue(requested_frame), "float", "", &stroke.red, 0, 255, false, requested_frame); root["stroke"]["blue"] = add_property_json("Blue", stroke.blue.GetValue(requested_frame), "float", "", &stroke.blue, 0, 255, false, requested_frame); @@ -558,8 +558,5 @@ std::map TrackedObjectBBox::GetBoxValues(int64_t frame_numbe boxValues["sy"] = this->scale_y.GetValue(frame_number); boxValues["dx"] = this->delta_x.GetValue(frame_number); boxValues["dy"] = this->delta_y.GetValue(frame_number); - boxValues["r"] = this->rotation.GetValue(frame_number); - - return boxValues; } diff --git a/src/TrackedObjectBBox.h b/src/TrackedObjectBBox.h index 9f4d13ddf..b250c15a2 100644 --- a/src/TrackedObjectBBox.h +++ b/src/TrackedObjectBBox.h @@ -32,7 +32,7 @@ namespace openshot * * The bounding-box structure holds five floating-point properties: * the x and y coordinates of the rectangle's center point (cx, cy), - * the rectangle's width, height and rotation. + * the rectangle's width, height and source rotation. */ struct BBox { @@ -120,7 +120,7 @@ namespace openshot * and functions to manipulate it. * * The bounding-box displacement in X and Y directions, it's width, - * height and rotation variation over the frames are set as + * and height variation over the frames are set as * openshot::Keyframe objects. * * The bounding-box information over the clip's frames are @@ -139,7 +139,6 @@ namespace openshot Keyframe delta_y; ///< Y-direction displacement Keyframe Keyframe scale_x; ///< X-direction scale Keyframe Keyframe scale_y; ///< Y-direction scale Keyframe - Keyframe rotation; ///< Rotation Keyframe Keyframe background_alpha; ///< Background box opacity Keyframe background_corner; ///< Radius of rounded corners Keyframe stroke_width; ///< Thickness of border line diff --git a/src/TrackedObjectBase.cpp b/src/TrackedObjectBase.cpp index 0fa4a05d5..818cb8686 100644 --- a/src/TrackedObjectBase.cpp +++ b/src/TrackedObjectBase.cpp @@ -23,7 +23,7 @@ namespace openshot // Constructor TrackedObjectBase::TrackedObjectBase(std::string _id) - : visible(1.0), draw_box(1), id(_id) {} + : visible(1.0), draw_box(1), draw_text(1), id(_id) {} Json::Value TrackedObjectBase::add_property_choice_json( std::string name, int value, int selected_value) const diff --git a/src/TrackedObjectBase.h b/src/TrackedObjectBase.h index 58b312ed5..f10a5bee2 100644 --- a/src/TrackedObjectBase.h +++ b/src/TrackedObjectBase.h @@ -45,6 +45,9 @@ namespace openshot { /// Keyframe to determine if a specific box is drawn (or hidden) Keyframe draw_box; + /// Keyframe to determine if a specific object label is drawn (or hidden) + Keyframe draw_text; + /// Default constructor TrackedObjectBase(); @@ -67,7 +70,7 @@ namespace openshot { /// Scale an object's property virtual void ScalePoints(double scale) { return; }; - /// Return the main properties of a TrackedObjectBBox instance - such as position, size and rotation + /// Return the main properties of a tracked object instance, such as position and size virtual std::map GetBoxValues(int64_t frame_number) const { std::map ret; return ret; }; /// Add a bounding box to the tracked object's BoxVec map virtual void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) { return; }; diff --git a/src/effects/ObjectDetection.cpp b/src/effects/ObjectDetection.cpp index 8e156ef8b..e5f0b3401 100644 --- a/src/effects/ObjectDetection.cpp +++ b/src/effects/ObjectDetection.cpp @@ -31,6 +31,36 @@ using namespace std; using namespace openshot; +namespace { +cv::Scalar default_class_color(const std::string& class_name, int index) +{ + const QString normalized = QString::fromStdString(class_name).trimmed().toLower(); + + // RGB values. Keep common object-detection classes on clear, saturated colors + // instead of the previous deterministic random palette. + if (normalized == "person") return cv::Scalar(83, 160, 237); + if (normalized == "car") return cv::Scalar(42, 200, 185); + if (normalized == "truck") return cv::Scalar(239, 126, 92); + if (normalized == "bus") return cv::Scalar(250, 196, 72); + if (normalized == "bicycle") return cv::Scalar(122, 201, 67); + if (normalized == "motorbike" || normalized == "motorcycle") return cv::Scalar(180, 126, 235); + if (normalized == "dog") return cv::Scalar(237, 92, 140); + if (normalized == "cat") return cv::Scalar(101, 214, 128); + + static const cv::Scalar palette[] = { + cv::Scalar(83, 160, 237), + cv::Scalar(42, 200, 185), + cv::Scalar(239, 126, 92), + cv::Scalar(250, 196, 72), + cv::Scalar(122, 201, 67), + cv::Scalar(180, 126, 235), + cv::Scalar(237, 92, 140), + cv::Scalar(72, 190, 230), + }; + return palette[index % (sizeof(palette) / sizeof(palette[0]))]; +} +} + // Default constructor ObjectDetection::ObjectDetection() @@ -99,7 +129,6 @@ std::shared_ptr ObjectDetection::GetFrame(std::shared_ptr frame, i // Get properties of tracked object (i.e. colors, stroke width, etc...) std::vector stroke_rgba = trackedObject->stroke.GetColorRGBA(frame_number); std::vector bg_rgba = trackedObject->background.GetColorRGBA(frame_number); - int stroke_width = trackedObject->stroke_width.GetValue(frame_number); float stroke_alpha = trackedObject->stroke_alpha.GetValue(frame_number); float bg_alpha = trackedObject->background_alpha.GetValue(frame_number); float bg_corner = trackedObject->background_corner.GetValue(frame_number); @@ -119,7 +148,7 @@ std::shared_ptr ObjectDetection::GetFrame(std::shared_ptr frame, i painter.drawRoundedRect(boxRect, bg_corner, bg_corner); } - if(display_box_text.GetValue(frame_number) == 1) { + if(display_box_text.GetValue(frame_number) == 1 && trackedObject->draw_text.GetValue(frame_number) == 1) { // Draw text label above bounding box // Get the confidence and classId for the current detection int classId = detections.classIds.at(i); @@ -170,18 +199,15 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath) // Clear out any old state classNames.clear(); + classesColor.clear(); detectionsData.clear(); trackedObjects.clear(); // Seed colors for each class - std::srand(1); for (int i = 0; i < objMessage.classnames_size(); ++i) { - classNames.push_back(objMessage.classnames(i)); - classesColor.push_back(cv::Scalar( - std::rand() % 205 + 50, - std::rand() % 205 + 50, - std::rand() % 205 + 50 - )); + const std::string class_name = objMessage.classnames(i); + classNames.push_back(class_name); + classesColor.push_back(default_class_color(class_name, i)); } // Walk every frame in the protobuf diff --git a/src/effects/Tracker.cpp b/src/effects/Tracker.cpp index 3cface1e5..c4060ba42 100644 --- a/src/effects/Tracker.cpp +++ b/src/effects/Tracker.cpp @@ -100,7 +100,6 @@ std::shared_ptr Tracker::GetFrame(std::shared_ptr frame, int64_t f if (trackedData->draw_box.GetValue(frame_number) == 1) { auto stroke_rgba = trackedData->stroke.GetColorRGBA(frame_number); - int stroke_width = trackedData->stroke_width.GetValue(frame_number); float stroke_alpha = trackedData->stroke_alpha.GetValue(frame_number); auto bg_rgba = trackedData->background.GetColorRGBA(frame_number); float bg_alpha = trackedData->background_alpha.GetValue(frame_number); diff --git a/tests/KeyFrame.cpp b/tests/KeyFrame.cpp index f3353b1e1..04dc2b726 100644 --- a/tests/KeyFrame.cpp +++ b/tests/KeyFrame.cpp @@ -584,10 +584,10 @@ TEST_CASE( "TrackedObjectBBox init", "[libopenshot][keyframe]" ) CHECK(kfb.scale_x.GetInt(1) == 1); CHECK(kfb.scale_y.GetInt(1) == 1); - CHECK(kfb.rotation.GetInt(1) == 0); - CHECK(kfb.stroke_width.GetInt(1) == 2); CHECK(kfb.stroke_alpha.GetValue(1) == Approx(0.7f).margin(0.0001)); + CHECK(kfb.draw_box.GetInt(1) == 1); + CHECK(kfb.draw_text.GetInt(1) == 1); CHECK(kfb.background_alpha .GetInt(1) == 0); CHECK(kfb.background_corner.GetInt(1) == 12); From be99ec02c6c950ab4fd946fa7d20696cc17df09e Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Sun, 10 May 2026 21:48:24 -0500 Subject: [PATCH 02/14] Migrate object detection to YOLOv5 ONNX - Replace YOLOv3 Darknet loading with YOLOv5 ONNX model loading and validation. - Parse YOLOv5 outputs with top class candidates for smoother labels. - Improve SORT tracking with class-score smoothing and stricter geometry gates. - Prevent active tracks from hopping to adjacent objects or tiny nested detections. - Keep object ids stable through protobuf load/save and add an object-detection debug example. - Add class-based default colors and matching border/background defaults. - Add All Objects pseudo-selection support for tracked-object properties. - Honor tracked-object corner radius when using tracker/object detector masks. - Add regression tests for tracking stability, all-object styling, masks, and keyframes. --- examples/Example_opencv.cpp | 15 +- src/CVObjectDetection.cpp | 238 +++++++++++++++++++++--------- src/CVObjectDetection.h | 7 +- src/EffectBase.cpp | 7 +- src/TrackedObjectBBox.cpp | 2 +- src/effects/ObjectDetection.cpp | 78 ++++++++-- src/effects/ObjectDetection.h | 9 +- src/sort_filter/KalmanTracker.cpp | 37 +++++ src/sort_filter/KalmanTracker.h | 18 ++- src/sort_filter/sort.cpp | 220 +++++++++++++++++---------- src/sort_filter/sort.hpp | 5 +- tests/CVObjectDetection.cpp | 5 +- tests/CVTracker.cpp | 162 ++++++++++++++++++++ tests/EffectMask.cpp | 136 +++++++++++++++++ tests/KeyFrame.cpp | 6 +- 15 files changed, 761 insertions(+), 184 deletions(-) diff --git a/examples/Example_opencv.cpp b/examples/Example_opencv.cpp index 5b9adb1e0..e621551af 100644 --- a/examples/Example_opencv.cpp +++ b/examples/Example_opencv.cpp @@ -261,24 +261,21 @@ string objectDetectionJson(bool onlyProtoPath){ string protobufDataPath = "example_object_detection.data"; // Define processing device string processingDevice = "GPU"; - // Set path to model configuration file - string modelConfiguration = "yolov3.cfg"; - // Set path to model weights - string modelWeights = "yolov3.weights"; + // Set path to YOLOv5 ONNX model + string model = "Yolo5/yolov5s.onnx"; // Set path to class names file - string classesFile = "obj.names"; + string classesFile = "Yolo5/obj.names"; // Construct all the composition of the JSON string string protobuf_data_path = jsonFormat("protobuf_data_path", protobufDataPath); string processing_device = jsonFormat("processing_device", processingDevice); - string model_configuration = jsonFormat("model_configuration", modelConfiguration); - string model_weights = jsonFormat("model_weights", modelWeights); + string model_path = jsonFormat("model", model); string classes_file = jsonFormat("classes_file", classesFile); // Return only the the protobuf path in JSON format if(onlyProtoPath) return "{" + protobuf_data_path + "}"; else - return "{" + protobuf_data_path + "," + processing_device + "," + model_configuration + "," - + model_weights + "," + classes_file + "}"; + return "{" + protobuf_data_path + "," + processing_device + "," + model_path + "," + + classes_file + "}"; } diff --git a/src/CVObjectDetection.cpp b/src/CVObjectDetection.cpp index 2b3cc8e09..6ed7205aa 100644 --- a/src/CVObjectDetection.cpp +++ b/src/CVObjectDetection.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include "CVObjectDetection.h" #include "Exceptions.h" @@ -26,10 +27,10 @@ using namespace openshot; using google::protobuf::util::TimeUtil; CVObjectDetection::CVObjectDetection(std::string processInfoJson, ProcessingController &processingController) -: processingController(&processingController), processingDevice("CPU"){ - SetJson(processInfoJson); - confThreshold = 0.5; +: processingController(&processingController), processingDevice("CPU"), inpWidth(640), inpHeight(640){ + confThreshold = 0.25; nmsThreshold = 0.1; + SetJson(processInfoJson); } void CVObjectDetection::setProcessingDevice(){ @@ -56,15 +57,48 @@ void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start, processingController->SetError(false, ""); + if(modelPath.empty()) { + processingController->SetError(true, "Missing path to YOLOv5 ONNX model file"); + error = true; + return; + } + if(classesFile.empty()) { + processingController->SetError(true, "Missing path to class name file"); + error = true; + return; + } + + std::ifstream model_file(modelPath); + if(!model_file.good()){ + processingController->SetError(true, "Incorrect path to YOLOv5 ONNX model file"); + error = true; + return; + } + std::ifstream classes_file(classesFile); + if(!classes_file.good()){ + processingController->SetError(true, "Incorrect path to class name file"); + error = true; + return; + } + // Load names of classes - std::ifstream ifs(classesFile.c_str()); + classNames.clear(); std::string line; - while (std::getline(ifs, line)) classNames.push_back(line); + while (std::getline(classes_file, line)) classNames.push_back(line); // Load the network - if(classesFile == "" || modelConfiguration == "" || modelWeights == "") + try { + net = cv::dnn::readNetFromONNX(modelPath); + } catch (const cv::Exception& e) { + std::string error_text = std::string("Failed to load model: ") + e.what(); + if (error_text.find("Unsupported data type: FLOAT16") != std::string::npos) { + error_text = "Failed to load ONNX model: FLOAT16 is not supported by this OpenCV build. " + "Please use an FP32 ONNX model."; + } + processingController->SetError(true, error_text); + error = true; return; - net = cv::dnn::readNetFromDarknet(modelConfiguration, modelWeights); + } setProcessingDevice(); size_t frame_number; @@ -99,17 +133,19 @@ void CVObjectDetection::DetectObjects(const cv::Mat &frame, size_t frameId){ cv::Mat blob; // Create a 4D blob from the frame. - int inpWidth, inpHeight; - inpWidth = inpHeight = 416; - cv::dnn::blobFromImage(frame, blob, 1/255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(0,0,0), true, false); - //Sets the input to the network - net.setInput(blob); - - // Runs the forward pass to get output of the output layers std::vector outs; - net.forward(outs, getOutputsNames(net)); + try { + // Sets the input to the network + net.setInput(blob); + // Runs the forward pass to get output of the output layers + net.forward(outs, getOutputsNames(net)); + } catch (const cv::Exception& e) { + processingController->SetError(true, std::string("Object detection inference failed: ") + e.what()); + error = true; + return; + } // Remove the bounding boxes with low confidence postprocess(frame.size(), outs, frameId); @@ -123,33 +159,71 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector std::vector classIds; std::vector confidences; std::vector boxes; + std::vector> detectionClassScores; std::vector objectIds; + const int maxClassCandidates = 5; + + for (size_t i = 0; i < outs.size(); ++i) { + cv::Mat det = outs[i]; + + // YOLOv5 ONNX output is usually [1, num_boxes, num_classes + 5]. + if (det.dims == 3) { + det = det.reshape(1, det.size[1]); + } + if (det.dims != 2 || det.cols < 6) { + continue; + } + + const float xFactor = static_cast(frameDims.width) / static_cast(inpWidth); + const float yFactor = static_cast(frameDims.height) / static_cast(inpHeight); + + float* data = reinterpret_cast(det.data); + for (int j = 0; j < det.rows; ++j, data += det.cols) { + std::vector rowClassScores; + rowClassScores.reserve(maxClassCandidates); + for (int classIndex = 5; classIndex < det.cols; ++classIndex) { + const float classConfidence = data[classIndex] * data[4]; + if (rowClassScores.size() < static_cast(maxClassCandidates)) { + rowClassScores.emplace_back(classIndex - 5, classConfidence); + std::sort(rowClassScores.begin(), rowClassScores.end(), + [](const ClassScore& a, const ClassScore& b) { return a.score > b.score; }); + } else if (classConfidence > rowClassScores.back().score) { + rowClassScores.back() = ClassScore(classIndex - 5, classConfidence); + std::sort(rowClassScores.begin(), rowClassScores.end(), + [](const ClassScore& a, const ClassScore& b) { return a.score > b.score; }); + } + } + if (rowClassScores.empty()) { + continue; + } + + float confidence = rowClassScores.front().score; + + if (confidence > confThreshold) { + int centerX = 0; + int centerY = 0; + int width = 0; + int height = 0; + + if (data[0] > 1.0f || data[1] > 1.0f || data[2] > 1.0f || data[3] > 1.0f) { + centerX = static_cast(data[0] * xFactor); + centerY = static_cast(data[1] * yFactor); + width = static_cast(data[2] * xFactor); + height = static_cast(data[3] * yFactor); + } else { + centerX = static_cast(data[0] * frameDims.width); + centerY = static_cast(data[1] * frameDims.height); + width = static_cast(data[2] * frameDims.width); + height = static_cast(data[3] * frameDims.height); + } - for (size_t i = 0; i < outs.size(); ++i) - { - // Scan through all the bounding boxes output from the network and keep only the - // ones with high confidence scores. Assign the box's class label as the class - // with the highest score for the box. - float* data = (float*)outs[i].data; - for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols) - { - cv::Mat scores = outs[i].row(j).colRange(5, outs[i].cols); - cv::Point classIdPoint; - double confidence; - // Get the value and location of the maximum score - cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint); - if (confidence > confThreshold) - { - int centerX = (int)(data[0] * frameDims.width); - int centerY = (int)(data[1] * frameDims.height); - int width = (int)(data[2] * frameDims.width); - int height = (int)(data[3] * frameDims.height); int left = centerX - width / 2; int top = centerY - height / 2; - classIds.push_back(classIdPoint.x); - confidences.push_back((float)confidence); + classIds.push_back(rowClassScores.front().classId); + confidences.push_back(confidence); boxes.push_back(cv::Rect(left, top, width, height)); + detectionClassScores.push_back(rowClassScores); } } } @@ -161,9 +235,16 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector // Pass boxes to SORT algorithm std::vector sortBoxes; - for(auto box : boxes) - sortBoxes.push_back(box); - sort.update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), confidences, classIds); + std::vector sortConfidences; + std::vector sortClassIds; + std::vector> sortClassScores; + for(auto index : indices) { + sortBoxes.push_back(boxes[index]); + sortConfidences.push_back(confidences[index]); + sortClassIds.push_back(classIds[index]); + sortClassScores.push_back(detectionClassScores[index]); + } + sort.update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), sortConfidences, sortClassIds, sortClassScores); // Clear data vectors boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear(); @@ -180,8 +261,8 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector // Remove boxes based on controids distance for(uint i = 0; i CVObjectDetection::getOutputsNames(const cv::dnn::Net& net) { - static std::vector names; - //Get the indices of the output layers, i.e. the layers with unconnected outputs std::vector outLayers = net.getUnconnectedOutLayers(); @@ -281,6 +360,7 @@ std::vector CVObjectDetection::getOutputsNames(const cv::dnn::Net& n std::vector layersNames = net.getLayerNames(); // Get the names of the output layers in names + std::vector names; names.resize(outLayers.size()); for (size_t i = 0; i < outLayers.size(); ++i) names[i] = layersNames[outLayers[i] - 1]; @@ -299,6 +379,11 @@ CVDetectionData CVObjectDetection::GetDetectionData(size_t frameId){ } bool CVObjectDetection::SaveObjDetectedData(){ + if(protobuf_data_path.empty()) { + cerr << "Missing path to object detection protobuf data file." << endl; + return false; + } + // Create tracker message pb_objdetect::ObjDetect objMessage; @@ -311,7 +396,6 @@ bool CVObjectDetection::SaveObjDetectedData(){ // Iterate over all frames data and save in protobuf message for(std::map::iterator it=detectionsData.begin(); it!=detectionsData.end(); ++it){ CVDetectionData dData = it->second; - pb_objdetect::Frame* pbFrameData; AddFrameDataToProto(objMessage.add_frame(), dData); } @@ -380,37 +464,49 @@ void CVObjectDetection::SetJsonValue(const Json::Value root) { if (!root["protobuf_data_path"].isNull()){ protobuf_data_path = (root["protobuf_data_path"].asString()); } + if (!root["processing-device"].isNull()){ processingDevice = (root["processing-device"].asString()); } - if (!root["model-config"].isNull()){ - modelConfiguration = (root["model-config"].asString()); - std::ifstream infile(modelConfiguration); - if(!infile.good()){ - processingController->SetError(true, "Incorrect path to model config file"); - error = true; - } - - } - if (!root["model-weights"].isNull()){ - modelWeights= (root["model-weights"].asString()); - std::ifstream infile(modelWeights); - if(!infile.good()){ - processingController->SetError(true, "Incorrect path to model weight file"); - error = true; - } - + if (!root["processing_device"].isNull()){ + processingDevice = (root["processing_device"].asString()); } if (!root["class-names"].isNull()){ classesFile = (root["class-names"].asString()); - - std::ifstream infile(classesFile); - if(!infile.good()){ - processingController->SetError(true, "Incorrect path to class name file"); - error = true; - } - } + if (!root["classes_file"].isNull()){ + classesFile = (root["classes_file"].asString()); + } + if (!root["model"].isNull()){ + modelPath = (root["model"].asString()); + } + if (!root["model_path"].isNull()){ + modelPath = (root["model_path"].asString()); + } + if (!root["input-width"].isNull()){ + inpWidth = root["input-width"].asInt(); + } + if (!root["input_width"].isNull()){ + inpWidth = root["input_width"].asInt(); + } + if (!root["input-height"].isNull()){ + inpHeight = root["input-height"].asInt(); + } + if (!root["input_height"].isNull()){ + inpHeight = root["input_height"].asInt(); + } + if (!root["confidence-threshold"].isNull()){ + confThreshold = root["confidence-threshold"].asFloat(); + } + if (!root["confidence_threshold"].isNull()){ + confThreshold = root["confidence_threshold"].asFloat(); + } + if (!root["nms-threshold"].isNull()){ + nmsThreshold = root["nms-threshold"].asFloat(); + } + if (!root["nms_threshold"].isNull()){ + nmsThreshold = root["nms_threshold"].asFloat(); + } } /* @@ -421,6 +517,11 @@ void CVObjectDetection::SetJsonValue(const Json::Value root) { // Load protobuf data file bool CVObjectDetection::_LoadObjDetectdData(){ + if(protobuf_data_path.empty()) { + cerr << "Missing path to object detection protobuf data file." << endl; + return false; + } + // Create tracker message pb_objdetect::ObjDetect objMessage; @@ -472,6 +573,7 @@ bool CVObjectDetection::_LoadObjDetectdData(){ // Push back data into vectors boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence); + objectIds.push_back(objectId); } // Assign data to object detector map diff --git a/src/CVObjectDetection.h b/src/CVObjectDetection.h index 3982a33ce..481e107ab 100644 --- a/src/CVObjectDetection.h +++ b/src/CVObjectDetection.h @@ -59,7 +59,7 @@ namespace openshot /** * @brief This class runs trought a clip to detect objects and returns the bounding boxes and its properties. * - * Object detection is performed using YoloV3 model with OpenCV DNN module + * Object detection is performed using a YOLOv5 ONNX model with OpenCV DNN module. */ class CVObjectDetection{ @@ -70,10 +70,11 @@ namespace openshot float confThreshold, nmsThreshold; std::string classesFile; - std::string modelConfiguration; - std::string modelWeights; + std::string modelPath; std::string processingDevice; std::string protobuf_data_path; + int inpWidth; + int inpHeight; SortTracker sort; diff --git a/src/EffectBase.cpp b/src/EffectBase.cpp index 42080e126..1f2cac0b7 100644 --- a/src/EffectBase.cpp +++ b/src/EffectBase.cpp @@ -544,7 +544,7 @@ std::shared_ptr EffectBase::TrackedObjectMask(std::shared_ptr ta mask_image->fill(QColor(0, 0, 0, 255)); QPainter painter(mask_image.get()); - painter.setRenderHint(QPainter::Antialiasing, false); + painter.setRenderHint(QPainter::Antialiasing, true); painter.setPen(Qt::NoPen); painter.setBrush(QBrush(QColor(255, 255, 255, 255))); @@ -564,16 +564,17 @@ std::shared_ptr EffectBase::TrackedObjectMask(std::shared_ptr ta const double y = (box.cy - box.height / 2.0) * target_image->height(); const double w = box.width * target_image->width(); const double h = box.height * target_image->height(); + const double corner = bbox->background_corner.GetValue(frame_number); QRectF rect(x, y, w, h); if (std::abs(box.angle) > 0.0001f) { painter.save(); painter.translate(rect.center()); painter.rotate(box.angle); - painter.drawRect(QRectF(-w / 2.0, -h / 2.0, w, h)); + painter.drawRoundedRect(QRectF(-w / 2.0, -h / 2.0, w, h), corner, corner); painter.restore(); } else { - painter.drawRect(rect); + painter.drawRoundedRect(rect, corner, corner); } drew_any_box = true; } diff --git a/src/TrackedObjectBBox.cpp b/src/TrackedObjectBBox.cpp index db506079b..80c77f251 100644 --- a/src/TrackedObjectBBox.cpp +++ b/src/TrackedObjectBBox.cpp @@ -39,7 +39,7 @@ TrackedObjectBBox::TrackedObjectBBox(int Red, int Green, int Blue, int Alfa) background_alpha(0.0), background_corner(12), stroke_width(2) , stroke_alpha(0.7), stroke(Red, Green, Blue, Alfa), - background(0, 0, 255, Alfa) + background(Red, Green, Blue, Alfa) { this->TimeScale = 1.0; } diff --git a/src/effects/ObjectDetection.cpp b/src/effects/ObjectDetection.cpp index e5f0b3401..d3c4a4c14 100644 --- a/src/effects/ObjectDetection.cpp +++ b/src/effects/ObjectDetection.cpp @@ -32,6 +32,22 @@ using namespace std; using namespace openshot; namespace { +bool is_all_objects_key(const std::string& name) +{ + const QString normalized = QString::fromStdString(name).trimmed().toLower(); + return normalized == "all" || normalized == "*" || normalized == "-1"; +} + +std::shared_ptr make_all_objects_properties( + const std::shared_ptr& source) +{ + auto properties = std::make_shared(); + if (source) + properties->SetJsonValue(source->JsonValue()); + properties->Id("All Objects"); + return properties; +} + cv::Scalar default_class_color(const std::string& class_name, int index) { const QString normalized = QString::fromStdString(class_name).trimmed().toLower(); @@ -249,6 +265,7 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath) /*alpha=*/0 ); tmpObj.stroke_alpha = Keyframe(1.0); + tmpObj.background_alpha = Keyframe(0.15); tmpObj.AddBox(frameId, x + w/2, y + h/2, w, h, 0.0); auto ptr = std::make_shared(tmpObj); @@ -271,9 +288,9 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath) google::protobuf::ShutdownProtobufLibrary(); - // Finally, pick a default selectedObjectIndex if we have any + // Default to the pseudo-selection that edits every tracked object. if (!trackedObjects.empty()) { - selectedObjectIndex = trackedObjects.begin()->first; + selectedObjectIndex = -1; } return true; @@ -294,6 +311,12 @@ std::string ObjectDetection::GetVisibleObjects(int64_t frame_number) const{ } DetectionData detections = detectionsData.at(frame_number); + if (!trackedObjects.empty()) { + root["visible_objects_index"].append(-1); + root["visible_objects_id"].append("All Objects"); + root["visible_class_names"].append("All Objects"); + } + // Iterate through the tracked objects for(int i = 0; i ObjectDetection::TrackedObjectMask(std::shared_ptrfill(QColor(0, 0, 0, 255)); QPainter painter(mask_image.get()); - painter.setRenderHint(QPainter::Antialiasing, false); + painter.setRenderHint(QPainter::Antialiasing, true); painter.setPen(Qt::NoPen); painter.setBrush(QBrush(QColor(255, 255, 255, 255))); @@ -387,7 +410,8 @@ std::shared_ptr ObjectDetection::TrackedObjectMask(std::shared_ptrheight(); const double w = box.width * target_image->width(); const double h = box.height * target_image->height(); - painter.drawRect(QRectF(x, y, w, h)); + const double corner = tracked_object->background_corner.GetValue(frame_number); + painter.drawRoundedRect(QRectF(x, y, w, h), corner, corner); drew_any_box = true; } @@ -457,6 +481,7 @@ void ObjectDetection::SetJsonValue(const Json::Value root) std::string new_path = root["protobuf_data_path"].asString(); if (protobuf_data_path != new_path || trackedObjects.empty()) { protobuf_data_path = new_path; + allObjectsProperties.reset(); if (!LoadObjDetectdData(protobuf_data_path)) { throw InvalidFile("Invalid protobuf data path", ""); } @@ -491,6 +516,25 @@ void ObjectDetection::SetJsonValue(const Json::Value root) const auto memberNames = root["objects"].getMemberNames(); for (const auto& name : memberNames) { + if (is_all_objects_key(name)) { + if (!allObjectsProperties) { + std::shared_ptr firstObject; + if (!trackedObjects.empty()) + firstObject = trackedObjects.begin()->second; + allObjectsProperties = make_all_objects_properties(firstObject); + } + allObjectsProperties->SetJsonValue(root["objects"][name]); + for (auto& trackedObject : trackedObjects) { + if (trackedObject.second) + trackedObject.second->SetJsonValue(root["objects"][name]); + } + } + } + + for (const auto& name : memberNames) + { + if (is_all_objects_key(name)) + continue; // Determine the numeric index of this object int index = -1; bool numeric_key = std::all_of(name.begin(), name.end(), ::isdigit); @@ -534,7 +578,19 @@ std::string ObjectDetection::PropertiesJSON(int64_t requested_frame) const { Json::Value root = BasePropertiesJSON(requested_frame); Json::Value objects; - if(trackedObjects.count(selectedObjectIndex) != 0){ + if(selectedObjectIndex == -1 && !trackedObjects.empty()){ + auto selectedObject = allObjectsProperties ? allObjectsProperties : trackedObjects.begin()->second; + if (selectedObject){ + Json::Value trackedObjectJSON = selectedObject->PropertiesJSON(requested_frame); + trackedObjectJSON["box_id"]["memo"] = "All Objects"; + trackedObjectJSON.removeMember("x1"); + trackedObjectJSON.removeMember("y1"); + trackedObjectJSON.removeMember("x2"); + trackedObjectJSON.removeMember("y2"); + objects["all"] = trackedObjectJSON; + } + } + else if(trackedObjects.count(selectedObjectIndex) != 0){ auto selectedObject = trackedObjects.at(selectedObjectIndex); if (selectedObject){ Json::Value trackedObjectJSON = selectedObject->PropertiesJSON(requested_frame); @@ -544,18 +600,10 @@ std::string ObjectDetection::PropertiesJSON(int64_t requested_frame) const { } root["objects"] = objects; - root["selected_object_index"] = add_property_json("Selected Object", selectedObjectIndex, "int", "", NULL, 0, 200, false, requested_frame); - root["confidence_threshold"] = add_property_json("Confidence Theshold", confidence_threshold, "float", "", NULL, 0, 1, false, requested_frame); + root["selected_object_index"] = add_property_json("Selected Object", selectedObjectIndex, "int", "", NULL, -1, 200, false, requested_frame); + root["confidence_threshold"] = add_property_json("Confidence Threshold", confidence_threshold, "float", "", NULL, 0, 1, false, requested_frame); root["class_filter"] = add_property_json("Class Filter", 0.0, "string", class_filter, NULL, -1, -1, false, requested_frame); - root["display_box_text"] = add_property_json("Draw All Text", display_box_text.GetValue(requested_frame), "int", "", &display_box_text, 0, 1, false, requested_frame); - root["display_box_text"]["choices"].append(add_property_choice_json("Yes", true, display_box_text.GetValue(requested_frame))); - root["display_box_text"]["choices"].append(add_property_choice_json("No", false, display_box_text.GetValue(requested_frame))); - - root["display_boxes"] = add_property_json("Draw All Boxes", display_boxes.GetValue(requested_frame), "int", "", &display_boxes, 0, 1, false, requested_frame); - root["display_boxes"]["choices"].append(add_property_choice_json("Yes", true, display_boxes.GetValue(requested_frame))); - root["display_boxes"]["choices"].append(add_property_choice_json("No", false, display_boxes.GetValue(requested_frame))); - // Return formatted string return root.toStyledString(); } diff --git a/src/effects/ObjectDetection.h b/src/effects/ObjectDetection.h index dcbb52792..f87786268 100644 --- a/src/effects/ObjectDetection.h +++ b/src/effects/ObjectDetection.h @@ -23,6 +23,10 @@ #include "Json.h" #include "KeyFrame.h" +namespace openshot { + class TrackedObjectBBox; +} + // Struct that stores the detected bounding boxes for all the clip frames struct DetectionData{ DetectionData(){} @@ -69,12 +73,15 @@ namespace openshot Keyframe display_boxes; /// Minimum confidence value to display the detected objects - float confidence_threshold = 0.5; + float confidence_threshold = 0.25; /// Contain the user selected classes for visualization std::vector display_classes; std::string class_filter; + /// Last explicit "All Objects" settings, used for stable UI readback + std::shared_ptr allObjectsProperties; + /// Init effect settings void init_effect_details(); diff --git a/src/sort_filter/KalmanTracker.cpp b/src/sort_filter/KalmanTracker.cpp index 1a50e4c3b..2552a0450 100644 --- a/src/sort_filter/KalmanTracker.cpp +++ b/src/sort_filter/KalmanTracker.cpp @@ -7,6 +7,7 @@ #include "KalmanTracker.h" #include +#include using namespace std; using namespace cv; @@ -99,6 +100,42 @@ void KalmanTracker::update( // detect_times.push_back(dt); } +void KalmanTracker::update_class_scores(const std::vector& classScores, int fallbackClassId, float fallbackConfidence) +{ + const double decay = 0.82; + const double update_weight = 1.0 - decay; + const bool had_history = !classScoreHistory.empty(); + + for (auto it = classScoreHistory.begin(); it != classScoreHistory.end();) { + it->second *= decay; + if (it->second < 0.0001) + it = classScoreHistory.erase(it); + else + ++it; + } + + const double candidate_weight = had_history ? update_weight : 1.0; + if (classScores.empty()) { + classScoreHistory[fallbackClassId] += fallbackConfidence * candidate_weight; + } else { + for (const auto& candidate : classScores) { + if (candidate.classId < 0 || candidate.score <= 0.0f) + continue; + classScoreHistory[candidate.classId] += candidate.score * candidate_weight; + } + } + + if (classScoreHistory.empty()) { + classId = fallbackClassId; + return; + } + + auto best = std::max_element( + classScoreHistory.begin(), classScoreHistory.end(), + [](const auto& a, const auto& b) { return a.second < b.second; }); + classId = best->first; +} + // Return the current state vector StateType KalmanTracker::get_state() { diff --git a/src/sort_filter/KalmanTracker.h b/src/sort_filter/KalmanTracker.h index 4c55c4c06..c9f5e8b68 100644 --- a/src/sort_filter/KalmanTracker.h +++ b/src/sort_filter/KalmanTracker.h @@ -11,9 +11,20 @@ #include "opencv2/video/tracking.hpp" #include "opencv2/highgui/highgui.hpp" +#include +#include + #define StateType cv::Rect_ +struct ClassScore +{ + int classId = 0; + float score = 0.0f; + ClassScore() {} + ClassScore(int _classId, float _score) : classId(_classId), score(_score) {} +}; + /// This class represents the internel state of individual tracked objects observed as bounding box. class KalmanTracker { @@ -27,7 +38,7 @@ class KalmanTracker m_age = 0; m_id = 0; } - KalmanTracker(StateType initRect, float confidence, int classId, int objectId) : confidence(confidence), classId(classId) + KalmanTracker(StateType initRect, float confidence, int classId, int objectId, const std::vector& classScores = {}) : confidence(confidence), classId(classId) { init_kf(initRect); m_time_since_update = 0; @@ -35,6 +46,7 @@ class KalmanTracker m_hit_streak = 0; m_age = 0; m_id = objectId; + update_class_scores(classScores, classId, confidence); } ~KalmanTracker() @@ -45,6 +57,7 @@ class KalmanTracker StateType predict(); StateType predict2(); void update(StateType stateMat); + void update_class_scores(const std::vector& classScores, int fallbackClassId, float fallbackConfidence); StateType get_state(); StateType get_rect_xysr(float cx, float cy, float s, float r); @@ -56,6 +69,7 @@ class KalmanTracker int m_id; float confidence; int classId; + std::map classScoreHistory; private: void init_kf(StateType stateMat); @@ -66,4 +80,4 @@ class KalmanTracker std::vector m_history; }; -#endif \ No newline at end of file +#endif diff --git a/src/sort_filter/sort.cpp b/src/sort_filter/sort.cpp index 78ae24320..d89673f03 100644 --- a/src/sort_filter/sort.cpp +++ b/src/sort_filter/sort.cpp @@ -3,9 +3,70 @@ // SPDX-License-Identifier: LGPL-3.0-or-later #include "sort.hpp" +#include using namespace std; +namespace { +double box_diagonal(const cv::Rect_& box) +{ + return std::sqrt(box.width * box.width + box.height * box.height); +} + +double box_area(const cv::Rect_& box) +{ + return std::max(0.0f, box.width) * std::max(0.0f, box.height); +} + +double aspect_ratio(const cv::Rect_& box) +{ + if (box.width <= 0.0f || box.height <= 0.0f) + return 0.0; + return static_cast(box.width) / static_cast(box.height); +} + +bool box_shape_matches(const cv::Rect_& predicted_box, const cv::Rect_& detection_box, double iou) +{ + const double predicted_area = box_area(predicted_box); + const double detection_area = box_area(detection_box); + if (predicted_area <= 1.0 || detection_area <= 1.0) + return false; + + const double area_ratio = detection_area / predicted_area; + const double predicted_aspect = aspect_ratio(predicted_box); + const double detection_aspect = aspect_ratio(detection_box); + const double aspect_ratio_delta = (predicted_aspect > 0.0 && detection_aspect > 0.0) + ? std::max(predicted_aspect / detection_aspect, detection_aspect / predicted_aspect) + : 999.0; + + if (iou >= 0.70) + return area_ratio >= 0.20 && area_ratio <= 5.00 && aspect_ratio_delta <= 4.00; + return area_ratio >= 0.35 && area_ratio <= 2.80 && aspect_ratio_delta <= 2.75; +} + +bool detection_matches_track_gate( + const KalmanTracker& tracker, + const cv::Rect_& predicted_box, + const TrackingBox& detection, + double iou, + double centroid_distance) +{ + if (!box_shape_matches(predicted_box, detection.box, iou)) + return false; + + const double scale = std::max(box_diagonal(predicted_box), box_diagonal(detection.box)); + const bool missed_previous_frame = tracker.m_time_since_update > 1; + + if (missed_previous_frame) { + const double reacquire_distance = std::max(12.0, scale * 0.25); + return iou >= 0.35 || centroid_distance <= reacquire_distance; + } + + const double local_distance = std::max(12.0, scale * 0.22); + return iou >= 0.20 || centroid_distance <= local_distance; +} +} + // Constructor SortTracker::SortTracker(int max_age, int min_hits, int max_missed, double min_iou, double nms_iou_thresh, double min_conf) { @@ -64,8 +125,9 @@ void apply_nms(vector& detections, double nms_iou_thresh) { for (size_t j = i + 1; j < detections.size(); ++j) { if (suppressed[j]) continue; - if (detections[i].classId == detections[j].classId && - SortTracker::GetIOU(detections[i].box, detections[j].box) > nms_iou_thresh) { + double iou = SortTracker::GetIOU(detections[i].box, detections[j].box); + if ((detections[i].classId == detections[j].classId && iou > nms_iou_thresh) || + iou > 0.85) { suppressed[j] = true; } } @@ -81,9 +143,12 @@ void apply_nms(vector& detections, double nms_iou_thresh) { detections = filtered; } -void SortTracker::update(vector detections_cv, int frame_count, double image_diagonal, std::vector confidences, std::vector classIds) +void SortTracker::update(vector detections_cv, int frame_count, double image_diagonal, std::vector confidences, std::vector classIds, std::vector> classScores) { vector detections; + dead_trackers_id.clear(); + if (classScores.size() != detections_cv.size()) + classScores.resize(detections_cv.size()); if (trackers.size() == 0) // the first frame met { alive_tracker = false; @@ -97,9 +162,15 @@ void SortTracker::update(vector detections_cv, int frame_count, double tb.box = cv::Rect_(detections_cv[i]); tb.classId = classIds[i]; tb.confidence = confidences[i]; + tb.classScores = classScores[i]; detections.push_back(tb); + } - KalmanTracker trk = KalmanTracker(detections.back().box, detections.back().confidence, detections.back().classId, _next_id++); + apply_nms(detections, _nms_iou_thresh); + + for (const auto& detection : detections) + { + KalmanTracker trk = KalmanTracker(detection.box, detection.confidence, detection.classId, _next_id++, detection.classScores); trackers.push_back(trk); } return; @@ -114,20 +185,12 @@ void SortTracker::update(vector detections_cv, int frame_count, double tb.box = cv::Rect_(detections_cv[i]); tb.classId = classIds[i]; tb.confidence = confidences[i]; + tb.classScores = classScores[i]; detections.push_back(tb); } // Apply NMS to remove duplicates apply_nms(detections, _nms_iou_thresh); - - for (auto it = frameTrackingResult.begin(); it != frameTrackingResult.end(); it++) - { - int frame_age = frame_count - it->frame; - if (frame_age >= _max_age || frame_age < 0) - { - dead_trackers_id.push_back(it->id); - } - } } /////////////////////////////////////// @@ -150,67 +213,86 @@ void SortTracker::update(vector detections_cv, int frame_count, double cost_matrix.clear(); cost_matrix.resize(trkNum, vector(detNum, 0)); - - for (unsigned int i = 0; i < trkNum; i++) // compute cost matrix using 1 - IOU with gating - { - for (unsigned int j = 0; j < detNum; j++) - { - double iou = GetIOU(predictedBoxes[i], detections[j].box); - double dist = GetCentroidsDistance(predictedBoxes[i], detections[j].box) / image_diagonal; - if (trackers[i].classId != detections[j].classId || dist > max_centroid_dist_norm) - { - cost_matrix[i][j] = 1e9; // large cost for gating - } - else - { - cost_matrix[i][j] = 1 - iou + (1 - detections[j].confidence) * 0.1; // slight penalty for low conf - } - } - } - - HungarianAlgorithm HungAlgo; assignment.clear(); - HungAlgo.Solve(cost_matrix, assignment); - // find matches, unmatched_detections and unmatched_predictions + matchedPairs.clear(); unmatchedTrajectories.clear(); unmatchedDetections.clear(); allItems.clear(); matchedItems.clear(); - if (detNum > trkNum) // there are unmatched detections + if (trkNum == 0) { - for (unsigned int n = 0; n < detNum; n++) - allItems.insert(n); - - for (unsigned int i = 0; i < trkNum; ++i) - matchedItems.insert(assignment[i]); - - set_difference(allItems.begin(), allItems.end(), - matchedItems.begin(), matchedItems.end(), - insert_iterator>(unmatchedDetections, unmatchedDetections.begin())); + for (auto& detection : detections) + { + KalmanTracker tracker = KalmanTracker(detection.box, detection.confidence, detection.classId, _next_id++, detection.classScores); + trackers.push_back(tracker); + } } - else if (detNum < trkNum) // there are unmatched trajectory/predictions + else if (detNum == 0) { for (unsigned int i = 0; i < trkNum; ++i) - if (assignment[i] == -1) // unassigned label will be set as -1 in the assignment algorithm - unmatchedTrajectories.insert(i); + unmatchedTrajectories.insert(i); } else - ; - - // filter out matched with low IOU - matchedPairs.clear(); - for (unsigned int i = 0; i < trkNum; ++i) { - if (assignment[i] == -1) // pass over invalid values - continue; - if (cost_matrix[i][assignment[i]] > 1 - _min_iou) + + for (unsigned int i = 0; i < trkNum; i++) // compute cost matrix using 1 - IOU with gating { - unmatchedTrajectories.insert(i); - unmatchedDetections.insert(assignment[i]); + for (unsigned int j = 0; j < detNum; j++) + { + double iou = GetIOU(predictedBoxes[i], detections[j].box); + double centroid_distance = GetCentroidsDistance(predictedBoxes[i], detections[j].box); + if (!detection_matches_track_gate(trackers[i], predictedBoxes[i], detections[j], iou, centroid_distance)) + { + cost_matrix[i][j] = 1e9; // large cost for gating + } + else + { + const double scale = std::max(1.0, std::max(box_diagonal(predictedBoxes[i]), box_diagonal(detections[j].box))); + const double distance_penalty = std::min(1.0, centroid_distance / scale) * 0.35; + cost_matrix[i][j] = 1 - iou + distance_penalty + (1 - detections[j].confidence) * 0.1; + } + } + } + + HungarianAlgorithm HungAlgo; + HungAlgo.Solve(cost_matrix, assignment); + + // find matches, unmatched_detections and unmatched_predictions + if (detNum > trkNum) // there are unmatched detections + { + for (unsigned int n = 0; n < detNum; n++) + allItems.insert(n); + + for (unsigned int i = 0; i < trkNum; ++i) + matchedItems.insert(assignment[i]); + + set_difference(allItems.begin(), allItems.end(), + matchedItems.begin(), matchedItems.end(), + insert_iterator>(unmatchedDetections, unmatchedDetections.begin())); + } + else if (detNum < trkNum) // there are unmatched trajectory/predictions + { + for (unsigned int i = 0; i < trkNum; ++i) + if (assignment[i] == -1) // unassigned label will be set as -1 in the assignment algorithm + unmatchedTrajectories.insert(i); } else - matchedPairs.push_back(cv::Point(i, assignment[i])); + ; + + // filter out matched with low IOU + for (unsigned int i = 0; i < trkNum; ++i) + { + if (assignment[i] == -1) // pass over invalid values + continue; + if (cost_matrix[i][assignment[i]] >= 1e8) + { + unmatchedTrajectories.insert(i); + unmatchedDetections.insert(assignment[i]); + } + else + matchedPairs.push_back(cv::Point(i, assignment[i])); + } } for (unsigned int i = 0; i < matchedPairs.size(); i++) @@ -218,30 +300,17 @@ void SortTracker::update(vector detections_cv, int frame_count, double int trkIdx = matchedPairs[i].x; int detIdx = matchedPairs[i].y; trackers[trkIdx].update(detections[detIdx].box); - trackers[trkIdx].classId = detections[detIdx].classId; + trackers[trkIdx].update_class_scores(detections[detIdx].classScores, detections[detIdx].classId, detections[detIdx].confidence); trackers[trkIdx].confidence = detections[detIdx].confidence; } // create and initialise new trackers for unmatched detections for (auto umd : unmatchedDetections) { - KalmanTracker tracker = KalmanTracker(detections[umd].box, detections[umd].confidence, detections[umd].classId, _next_id++); + KalmanTracker tracker = KalmanTracker(detections[umd].box, detections[umd].confidence, detections[umd].classId, _next_id++, detections[umd].classScores); trackers.push_back(tracker); } - for (auto it2 = dead_trackers_id.begin(); it2 != dead_trackers_id.end(); it2++) - { - for (unsigned int i = 0; i < trackers.size();) - { - if (trackers[i].m_id == (*it2)) - { - trackers.erase(trackers.begin() + i); - continue; - } - i++; - } - } - // get trackers' output frameTrackingResult.clear(); for (unsigned int i = 0; i < trackers.size();) @@ -251,7 +320,10 @@ void SortTracker::update(vector detections_cv, int frame_count, double { alive_tracker = true; TrackingBox res; - res.box = trackers[i].get_state(); + if (trackers[i].m_time_since_update > 0 && i < predictedBoxes.size()) + res.box = predictedBoxes[i]; + else + res.box = trackers[i].get_state(); res.id = trackers[i].m_id; res.frame = frame_count; res.classId = trackers[i].classId; diff --git a/src/sort_filter/sort.hpp b/src/sort_filter/sort.hpp index 74e905adc..cc6742b7f 100644 --- a/src/sort_filter/sort.hpp +++ b/src/sort_filter/sort.hpp @@ -26,6 +26,7 @@ typedef struct TrackingBox int classId = 0; int id = 0; cv::Rect_ box = cv::Rect_(0.0, 0.0, 0.0, 0.0); + std::vector classScores; TrackingBox() {} TrackingBox(int _frame, float _confidence, int _classId, int _id) : frame(_frame), confidence(_confidence), classId(_classId), id(_id) {} } TrackingBox; @@ -34,11 +35,11 @@ class SortTracker { public: // Constructor - SortTracker(int max_age = 50, int min_hits = 5, int max_missed = 7, double min_iou = 0.1, double nms_iou_thresh = 0.5, double min_conf = 0.3); + SortTracker(int max_age = 50, int min_hits = 5, int max_missed = 3, double min_iou = 0.1, double nms_iou_thresh = 0.5, double min_conf = 0.3); // Initialize tracker // Update position based on the new frame - void update(std::vector detection, int frame_count, double image_diagonal, std::vector confidences, std::vector classIds); + void update(std::vector detection, int frame_count, double image_diagonal, std::vector confidences, std::vector classIds, std::vector> classScores = {}); static double GetIOU(cv::Rect_ bb_test, cv::Rect_ bb_gt); double GetCentroidsDistance(cv::Rect_ bb_test, cv::Rect_ bb_gt); std::vector trackers; diff --git a/tests/CVObjectDetection.cpp b/tests/CVObjectDetection.cpp index 4d5b4b290..7ecc912dd 100644 --- a/tests/CVObjectDetection.cpp +++ b/tests/CVObjectDetection.cpp @@ -25,9 +25,8 @@ using namespace openshot; std::string effectInfo =(" {\"protobuf_data_path\": \"objdetector.data\", " " \"processing_device\": \"GPU\", " - " \"model_configuration\": \"~/yolo/yolov3.cfg\", " - " \"model_weights\": \"~/yolo/yolov3.weights\", " - " \"classes_file\": \"~/yolo/obj.names\"} "); + " \"model\": \"~/yolo/Yolo5/yolov5s.onnx\", " + " \"classes_file\": \"~/yolo/Yolo5/obj.names\"} "); // Just for the stabilizer constructor, it won't be used ProcessingController processingController; diff --git a/tests/CVTracker.cpp b/tests/CVTracker.cpp index 9336f74eb..8661b1d1d 100644 --- a/tests/CVTracker.cpp +++ b/tests/CVTracker.cpp @@ -13,6 +13,7 @@ #include #include +#include #include "openshot_catch.h" @@ -20,6 +21,7 @@ #include "CVTracker.h" // for FrameData, CVTracker #include "ProcessingController.h" #include "Exceptions.h" +#include "sort_filter/sort.hpp" using namespace openshot; @@ -181,6 +183,166 @@ TEST_CASE( "Track_FrameSizeChangeDoesNotCrash", "[libopenshot][opencv][tracker]" CHECK(fd.y2 <= 1.0f); } +TEST_CASE( "KalmanTracker smooths class scores", "[libopenshot][opencv][tracker]" ) +{ + KalmanTracker tracker( + cv::Rect_(0.0f, 0.0f, 10.0f, 10.0f), + 0.9f, 1, 42, + { ClassScore(1, 0.9f), ClassScore(2, 0.1f) } + ); + + CHECK(tracker.classId == 1); + CHECK(tracker.confidence == Approx(0.9f)); + + tracker.update_class_scores({ ClassScore(1, 0.1f), ClassScore(2, 0.9f) }, 2, 0.9f); + CHECK(tracker.classId == 1); + + tracker.update_class_scores({ ClassScore(1, 0.1f), ClassScore(2, 0.9f) }, 2, 0.9f); + CHECK(tracker.classId == 1); + + tracker.update_class_scores({ ClassScore(1, 0.1f), ClassScore(2, 0.9f) }, 2, 0.9f); + tracker.update_class_scores({ ClassScore(1, 0.1f), ClassScore(2, 0.9f) }, 2, 0.9f); + CHECK(tracker.classId == 2); +} + +TEST_CASE( "SortTracker does not reacquire a missed track onto a nearby object", "[libopenshot][opencv][tracker]" ) +{ + SortTracker sort(50, 1, 7, 0.1, 0.5, 0.0); + const double diagonal = std::sqrt(1920.0 * 1920.0 + 1080.0 * 1080.0); + + sort.update( + { cv::Rect(100, 100, 60, 60) }, + 1, + diagonal, + { 0.95f }, + { 2 }, + { { ClassScore(2, 0.95f) } } + ); + sort.update( + { cv::Rect(100, 100, 60, 60) }, + 2, + diagonal, + { 0.95f }, + { 2 }, + { { ClassScore(2, 0.95f) } } + ); + REQUIRE(sort.frameTrackingResult.size() == 1); + const int first_id = sort.frameTrackingResult[0].id; + + sort.update({}, 3, diagonal, {}, {}, {}); + REQUIRE(sort.frameTrackingResult.size() == 1); + CHECK(sort.frameTrackingResult[0].id == first_id); + + sort.update( + { cv::Rect(100, 145, 60, 60) }, + 4, + diagonal, + { 0.95f }, + { 2 }, + { { ClassScore(2, 0.95f) } } + ); + + bool original_track_coasted = false; + for (const auto& result : sort.frameTrackingResult) { + if (result.id == first_id) { + original_track_coasted = true; + CHECK(result.box.y < 130.0f); + } + } + CHECK(original_track_coasted); + CHECK(sort.trackers.size() >= 2); +} + +TEST_CASE( "SortTracker rejects adjacent-object handoff for active track", "[libopenshot][opencv][tracker]" ) +{ + SortTracker sort(50, 1, 3, 0.1, 0.5, 0.0); + const double diagonal = std::sqrt(960.0 * 960.0 + 540.0 * 540.0); + + sort.update( + { cv::Rect(299, 181, 112, 97) }, + 1, + diagonal, + { 0.80f }, + { 2 }, + { { ClassScore(2, 0.80f) } } + ); + sort.update( + { cv::Rect(299, 181, 112, 97) }, + 2, + diagonal, + { 0.80f }, + { 2 }, + { { ClassScore(2, 0.80f) } } + ); + REQUIRE(sort.frameTrackingResult.size() == 1); + const int first_id = sort.frameTrackingResult[0].id; + + sort.update( + { cv::Rect(248, 156, 103, 71) }, + 3, + diagonal, + { 0.77f }, + { 2 }, + { { ClassScore(2, 0.77f) } } + ); + + bool original_track_did_not_jump = false; + for (const auto& result : sort.frameTrackingResult) { + if (result.id == first_id) { + original_track_did_not_jump = true; + CHECK(result.box.x > 285.0f); + CHECK(result.box.y > 170.0f); + } + } + CHECK(original_track_did_not_jump); + CHECK(sort.trackers.size() >= 2); +} + +TEST_CASE( "SortTracker rejects tiny nested detection for vehicle track", "[libopenshot][opencv][tracker]" ) +{ + SortTracker sort(50, 1, 3, 0.1, 0.5, 0.0); + const double diagonal = std::sqrt(960.0 * 960.0 + 540.0 * 540.0); + + sort.update( + { cv::Rect(520, 178, 123, 91) }, + 1, + diagonal, + { 0.77f }, + { 2 }, + { { ClassScore(2, 0.77f) } } + ); + sort.update( + { cv::Rect(520, 178, 123, 91) }, + 2, + diagonal, + { 0.77f }, + { 2 }, + { { ClassScore(2, 0.77f) } } + ); + REQUIRE(sort.frameTrackingResult.size() == 1); + const int car_id = sort.frameTrackingResult[0].id; + + sort.update( + { cv::Rect(592, 198, 30, 13) }, + 3, + diagonal, + { 0.36f }, + { 0 }, + { { ClassScore(0, 0.36f), ClassScore(2, 0.15f) } } + ); + + bool car_track_did_not_shrink = false; + for (const auto& result : sort.frameTrackingResult) { + if (result.id == car_id) { + car_track_did_not_shrink = true; + CHECK(result.box.width > 90.0f); + CHECK(result.box.height > 70.0f); + } + } + CHECK(car_track_did_not_shrink); + CHECK(sort.trackers.size() >= 2); +} + TEST_CASE( "SaveLoad_Protobuf", "[libopenshot][opencv][tracker]" ) { diff --git a/tests/EffectMask.cpp b/tests/EffectMask.cpp index d099812e7..f9d2e6243 100644 --- a/tests/EffectMask.cpp +++ b/tests/EffectMask.cpp @@ -24,10 +24,12 @@ #include "effects/Blur.h" #include "effects/Brightness.h" #include "effects/Hue.h" +#include "effects/ObjectDetection.h" #include "effects/Pixelate.h" #include "effects/Saturation.h" #include "effects/Sharpen.h" #include "effects/Tracker.h" +#include "TrackedObjectBBox.h" #include "QtImageReader.h" #include "openshot_catch.h" @@ -149,6 +151,116 @@ TEST_CASE("EffectBase common mask blend applies to ProcessFrame", "[effect][mask CHECK(out_image->pixelColor(3, 0).red() == 80); } +TEST_CASE("ObjectDetection all object update applies sparse style keys", "[effect][object_detection]") { + ObjectDetection effect; + auto first = std::make_shared(10, 20, 30, 255); + auto second = std::make_shared(200, 210, 220, 255); + first->AddBox(1, 0.25f, 0.25f, 0.2f, 0.2f, 0.0f); + second->AddBox(1, 0.75f, 0.75f, 0.2f, 0.2f, 0.0f); + effect.trackedObjects[1] = first; + effect.trackedObjects[2] = second; + + Json::Value update; + update["objects"]["all"]["background_alpha"] = Keyframe(0.0).JsonValue(); + update["objects"]["all"]["stroke_width"] = Keyframe(5.0).JsonValue(); + effect.SetJsonValue(update); + + CHECK(first->background_alpha.GetValue(1) == Approx(0.0)); + CHECK(second->background_alpha.GetValue(1) == Approx(0.0)); + CHECK(first->stroke_width.GetValue(1) == Approx(5.0)); + CHECK(second->stroke_width.GetValue(1) == Approx(5.0)); + + CHECK(first->stroke.red.GetValue(1) == Approx(10.0)); + CHECK(second->stroke.red.GetValue(1) == Approx(200.0)); + CHECK(first->background.red.GetValue(1) == Approx(10.0)); + CHECK(second->background.red.GetValue(1) == Approx(200.0)); +} + +TEST_CASE("ObjectDetection individual style overrides all object style", "[effect][object_detection]") { + ObjectDetection effect; + auto first = std::make_shared(10, 20, 30, 255); + auto second = std::make_shared(200, 210, 220, 255); + first->Id("effect-1"); + second->Id("effect-2"); + first->AddBox(1, 0.25f, 0.25f, 0.2f, 0.2f, 0.0f); + second->AddBox(1, 0.75f, 0.75f, 0.2f, 0.2f, 0.0f); + effect.trackedObjects[1] = first; + effect.trackedObjects[2] = second; + effect.selectedObjectIndex = 1; + + Json::Value update; + update["objects"]["effect-1"]["stroke"]["red"] = Keyframe(255.0).JsonValue(); + update["objects"]["effect-1"]["stroke"]["green"] = Keyframe(0.0).JsonValue(); + update["objects"]["effect-1"]["stroke"]["blue"] = Keyframe(0.0).JsonValue(); + update["objects"]["all"]["stroke"]["red"] = Keyframe(0.0).JsonValue(); + update["objects"]["all"]["stroke"]["green"] = Keyframe(255.0).JsonValue(); + update["objects"]["all"]["stroke"]["blue"] = Keyframe(0.0).JsonValue(); + effect.SetJsonValue(update); + + CHECK(first->stroke.red.GetValue(1) == Approx(255.0)); + CHECK(first->stroke.green.GetValue(1) == Approx(0.0)); + CHECK(first->stroke.blue.GetValue(1) == Approx(0.0)); + CHECK(second->stroke.red.GetValue(1) == Approx(0.0)); + CHECK(second->stroke.green.GetValue(1) == Approx(255.0)); + CHECK(second->stroke.blue.GetValue(1) == Approx(0.0)); + + Json::Value props = stringToJson(effect.PropertiesJSON(1)); + REQUIRE(props["objects"].isMember("effect-1")); + CHECK(props["objects"]["effect-1"]["stroke"]["red"]["value"].asDouble() == Approx(255.0)); + CHECK(props["objects"]["effect-1"]["stroke"]["green"]["value"].asDouble() == Approx(0.0)); + CHECK(props["objects"]["effect-1"]["stroke"]["blue"]["value"].asDouble() == Approx(0.0)); +} + +TEST_CASE("ObjectDetection all object selection exposes tracked object properties", "[effect][object_detection]") { + ObjectDetection effect; + auto tracked = std::make_shared(10, 20, 30, 255); + tracked->AddBox(1, 0.25f, 0.25f, 0.2f, 0.2f, 0.0f); + effect.trackedObjects[1] = tracked; + effect.selectedObjectIndex = -1; + + Json::Value props = stringToJson(effect.PropertiesJSON(1)); + REQUIRE(props["objects"].isMember("all")); + CHECK(props["selected_object_index"]["min"].asInt() == -1); + CHECK(props["objects"]["all"].isMember("background_alpha")); + CHECK(props["objects"]["all"].isMember("stroke")); + CHECK(props["objects"]["all"].isMember("delta_x")); + CHECK(props["objects"]["all"].isMember("scale_x")); + CHECK_FALSE(props["objects"]["all"].isMember("x1")); + CHECK_FALSE(props.isMember("display_box_text")); + CHECK_FALSE(props.isMember("display_boxes")); +} + +TEST_CASE("ObjectDetection all object properties keep explicit all values", "[effect][object_detection]") { + ObjectDetection effect; + auto first = std::make_shared(10, 20, 30, 255); + auto second = std::make_shared(200, 210, 220, 255); + first->Id("effect-1"); + second->Id("effect-2"); + first->AddBox(1, 0.25f, 0.25f, 0.2f, 0.2f, 0.0f); + second->AddBox(1, 0.75f, 0.75f, 0.2f, 0.2f, 0.0f); + effect.trackedObjects[1] = first; + effect.trackedObjects[2] = second; + + Json::Value update; + update["objects"]["all"]["stroke_width"] = Keyframe(6.0).JsonValue(); + update["objects"]["all"]["stroke_alpha"] = Keyframe(0.25).JsonValue(); + update["objects"]["effect-1"]["stroke_width"] = Keyframe(3.0).JsonValue(); + update["objects"]["effect-1"]["stroke_alpha"] = Keyframe(0.75).JsonValue(); + effect.SetJsonValue(update); + + effect.selectedObjectIndex = -1; + Json::Value all_props = stringToJson(effect.PropertiesJSON(1)); + REQUIRE(all_props["objects"].isMember("all")); + CHECK(all_props["objects"]["all"]["stroke_width"]["value"].asDouble() == Approx(6.0)); + CHECK(all_props["objects"]["all"]["stroke_alpha"]["value"].asDouble() == Approx(0.25)); + + effect.selectedObjectIndex = 1; + Json::Value first_props = stringToJson(effect.PropertiesJSON(1)); + REQUIRE(first_props["objects"].isMember("effect-1")); + CHECK(first_props["objects"]["effect-1"]["stroke_width"]["value"].asDouble() == Approx(3.0)); + CHECK(first_props["objects"]["effect-1"]["stroke_alpha"]["value"].asDouble() == Approx(0.75)); +} + TEST_CASE("EffectBase mask fields serialize and deserialize", "[effect][mask][json]") { const std::string mask_path = create_mask_png({255, 0}); @@ -196,6 +308,30 @@ TEST_CASE("EffectBase can use tracker effect bbox as generated mask source", "[e CHECK(out_image->pixelColor(3, 0).red() == 80); } +TEST_CASE("EffectBase tracker mask source honors corner radius", "[effect][mask][tracker]") { + Clip parent_clip; + Tracker tracker; + tracker.Id("tracker-source"); + tracker.trackedData->SetBaseFPS(Fraction(30, 1)); + tracker.trackedData->AddBox(1, 0.5f, 0.5f, 1.0f, 1.0f, 0.0f); + tracker.trackedData->background_corner = Keyframe(12.0); + + Brightness brightness(Keyframe(0.8), Keyframe(0.0)); + brightness.MaskSourceId("tracker-source"); + + parent_clip.AddEffect(&tracker); + parent_clip.AddEffect(&brightness); + + auto frame = std::make_shared(1, 20, 20, "#000000"); + frame->GetImage()->fill(QColor(80, 80, 80, 255)); + + auto out = brightness.ProcessFrame(frame, 1); + auto out_image = out->GetImage(); + + CHECK(out_image->pixelColor(0, 0).red() == 80); + CHECK(out_image->pixelColor(10, 10).red() > 80); +} + TEST_CASE("EffectBase tracker mask source with no visible bbox preserves original frame", "[effect][mask][tracker]") { Clip parent_clip; Tracker tracker; diff --git a/tests/KeyFrame.cpp b/tests/KeyFrame.cpp index 04dc2b726..00c1050c0 100644 --- a/tests/KeyFrame.cpp +++ b/tests/KeyFrame.cpp @@ -597,9 +597,9 @@ TEST_CASE( "TrackedObjectBBox init", "[libopenshot][keyframe]" ) CHECK(kfb.stroke.blue.GetInt(1) == 0); CHECK(kfb.stroke.alpha.GetInt(1) == 212); - CHECK(kfb.background.red.GetInt(1) == 0); - CHECK(kfb.background.green.GetInt(1) == 0); - CHECK(kfb.background.blue.GetInt(1) == 255); + CHECK(kfb.background.red.GetInt(1) == 62); + CHECK(kfb.background.green.GetInt(1) == 143); + CHECK(kfb.background.blue.GetInt(1) == 0); CHECK(kfb.background.alpha.GetInt(1) == 212); } From eae93e1ebfee8c02693e25f9710072960724ecdb Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Sun, 10 May 2026 22:26:18 -0500 Subject: [PATCH 03/14] Protecting crash when reading ONNX model with wrong format (Object Detection effect) --- src/CVObjectDetection.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/CVObjectDetection.cpp b/src/CVObjectDetection.cpp index 6ed7205aa..f10ae302e 100644 --- a/src/CVObjectDetection.cpp +++ b/src/CVObjectDetection.cpp @@ -98,6 +98,14 @@ void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start, processingController->SetError(true, error_text); error = true; return; + } catch (const std::exception& e) { + processingController->SetError(true, std::string("Failed to load ONNX model: ") + e.what()); + error = true; + return; + } catch (...) { + processingController->SetError(true, "Failed to load ONNX model: unknown error"); + error = true; + return; } setProcessingDevice(); From 4f289bc4420f05b548a67ec81761c7ab5559513c Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Tue, 12 May 2026 14:22:06 -0500 Subject: [PATCH 04/14] Fix saturation stride and parent effect updates - Respect QImage::bytesPerLine() in saturation and mask blending - Preserve child effect IDs when inheriting parent effect changes - Propagate parent updates correctly through nested child effects - Add regression tests for padded image stride and parent effect inheritance --- src/EffectBase.cpp | 91 ++++++++++++++--------- src/effects/Saturation.cpp | 148 +++++++++++++++++++++---------------- tests/CMakeLists.txt | 1 + tests/Saturation.cpp | 78 +++++++++++++++++++ tests/Timeline.cpp | 65 ++++++++++++++++ 5 files changed, 284 insertions(+), 99 deletions(-) create mode 100644 tests/Saturation.cpp diff --git a/src/EffectBase.cpp b/src/EffectBase.cpp index 1f2cac0b7..2fb9b10b5 100644 --- a/src/EffectBase.cpp +++ b/src/EffectBase.cpp @@ -144,27 +144,22 @@ void EffectBase::SetJson(const std::string value) { // Load Json::Value into this object void EffectBase::SetJsonValue(const Json::Value root) { - - if (ParentTimeline()){ - // Get parent timeline - Timeline* parentTimeline = static_cast(ParentTimeline()); - - // Get the list of effects on the timeline - std::list effects = parentTimeline->ClipEffects(); - - // TODO: Fix recursive call for Object Detection - - // // Loop through the effects and check if we have a child effect linked to this effect - for (auto const& effect : effects){ - // Set the properties of all effects which parentEffect points to this - if ((effect->info.parent_effect_id == this->Id()) && (effect->Id() != this->Id())) - effect->SetJsonValue(root); - } - } + const std::string original_id = this->Id(); + const std::string original_parent_effect_id = this->info.parent_effect_id; // Set this effect properties with the parent effect properties (except the id and parent_effect_id) Json::Value my_root; - if (parentEffect){ + const bool applying_parent_payload = + !original_id.empty() && + !original_parent_effect_id.empty() && + !root["id"].isNull() && + root["id"].asString() == original_parent_effect_id && + root["id"].asString() != original_id; + if (applying_parent_payload) { + my_root = root; + my_root["id"] = original_id; + my_root["parent_effect_id"] = original_parent_effect_id; + } else if (parentEffect){ my_root = parentEffect->JsonValue(); my_root["id"] = this->Id(); my_root["parent_effect_id"] = this->info.parent_effect_id; @@ -224,6 +219,23 @@ void EffectBase::SetJsonValue(const Json::Value root) { else parentEffect = NULL; } + + if (ParentTimeline()){ + // Get parent timeline + Timeline* parentTimeline = static_cast(ParentTimeline()); + + // Get the list of effects on the timeline + std::list effects = parentTimeline->ClipEffects(); + + // TODO: Fix recursive call for Object Detection + + // Loop through the effects and check if we have a child effect linked to this effect + for (auto const& effect : effects){ + // Set the properties of all effects which parentEffect points to this + if ((effect->info.parent_effect_id == this->Id()) && (effect->Id() != this->Id())) + effect->SetJsonValue(my_root); + } + } } // Generate Json::Value for this object @@ -595,25 +607,34 @@ void EffectBase::BlendWithMask(std::shared_ptr original_image, std::shar unsigned char* original_pixels = reinterpret_cast(original_image->bits()); unsigned char* effected_pixels = reinterpret_cast(effected_image->bits()); unsigned char* mask_pixels = reinterpret_cast(mask_image->bits()); - const int pixel_count = effected_image->width() * effected_image->height(); + const int width = effected_image->width(); + const int height = effected_image->height(); + const int original_stride = original_image->bytesPerLine(); + const int effected_stride = effected_image->bytesPerLine(); + const int mask_stride = mask_image->bytesPerLine(); #pragma omp parallel for schedule(static) - for (int i = 0; i < pixel_count; ++i) { - const int idx = i * 4; - int gray = qGray(mask_pixels[idx], mask_pixels[idx + 1], mask_pixels[idx + 2]); - if (mask_invert) - gray = 255 - gray; - const float factor = static_cast(gray) / 255.0f; - const float inverse = 1.0f - factor; - - effected_pixels[idx] = static_cast( - (original_pixels[idx] * inverse) + (effected_pixels[idx] * factor)); - effected_pixels[idx + 1] = static_cast( - (original_pixels[idx + 1] * inverse) + (effected_pixels[idx + 1] * factor)); - effected_pixels[idx + 2] = static_cast( - (original_pixels[idx + 2] * inverse) + (effected_pixels[idx + 2] * factor)); - effected_pixels[idx + 3] = static_cast( - (original_pixels[idx + 3] * inverse) + (effected_pixels[idx + 3] * factor)); + for (int y = 0; y < height; ++y) { + unsigned char* original_row = original_pixels + y * original_stride; + unsigned char* effected_row = effected_pixels + y * effected_stride; + unsigned char* mask_row = mask_pixels + y * mask_stride; + for (int x = 0; x < width; ++x) { + const int idx = x * 4; + int gray = qGray(mask_row[idx], mask_row[idx + 1], mask_row[idx + 2]); + if (mask_invert) + gray = 255 - gray; + const float factor = static_cast(gray) / 255.0f; + const float inverse = 1.0f - factor; + + effected_row[idx] = static_cast( + (original_row[idx] * inverse) + (effected_row[idx] * factor)); + effected_row[idx + 1] = static_cast( + (original_row[idx + 1] * inverse) + (effected_row[idx + 1] * factor)); + effected_row[idx + 2] = static_cast( + (original_row[idx + 2] * inverse) + (effected_row[idx + 2] * factor)); + effected_row[idx + 3] = static_cast( + (original_row[idx + 3] * inverse) + (effected_row[idx + 3] * factor)); + } } } diff --git a/src/effects/Saturation.cpp b/src/effects/Saturation.cpp index a10284259..39bd6914a 100644 --- a/src/effects/Saturation.cpp +++ b/src/effects/Saturation.cpp @@ -49,44 +49,58 @@ void Saturation::ApplyCustomMaskBlend(std::shared_ptr original_image, st unsigned char* original_pixels = reinterpret_cast(original_image->bits()); unsigned char* effected_pixels = reinterpret_cast(effected_image->bits()); unsigned char* mask_pixels = reinterpret_cast(mask_image->bits()); - const int pixel_count = effected_image->width() * effected_image->height(); + const int width = effected_image->width(); + const int height = effected_image->height(); + const int original_stride = original_image->bytesPerLine(); + const int effected_stride = effected_image->bytesPerLine(); + const int mask_stride = mask_image->bytesPerLine(); if (mask_invert) { #pragma omp parallel for schedule(static) - for (int i = 0; i < pixel_count; ++i) { - const int idx = i * 4; - float factor = static_cast(qGray(mask_pixels[idx], mask_pixels[idx + 1], mask_pixels[idx + 2])) / 255.0f; - factor = 1.0f - factor; - // Use a non-linear response curve for custom saturation drive mode. - factor = factor * factor; - const float inverse = 1.0f - factor; - - // Drive saturation strength with mask while preserving source alpha. - effected_pixels[idx] = static_cast( - (original_pixels[idx] * inverse) + (effected_pixels[idx] * factor)); - effected_pixels[idx + 1] = static_cast( - (original_pixels[idx + 1] * inverse) + (effected_pixels[idx + 1] * factor)); - effected_pixels[idx + 2] = static_cast( - (original_pixels[idx + 2] * inverse) + (effected_pixels[idx + 2] * factor)); - effected_pixels[idx + 3] = original_pixels[idx + 3]; + for (int y = 0; y < height; ++y) { + unsigned char* original_row = original_pixels + y * original_stride; + unsigned char* effected_row = effected_pixels + y * effected_stride; + unsigned char* mask_row = mask_pixels + y * mask_stride; + for (int x = 0; x < width; ++x) { + const int idx = x * 4; + float factor = static_cast(qGray(mask_row[idx], mask_row[idx + 1], mask_row[idx + 2])) / 255.0f; + factor = 1.0f - factor; + // Use a non-linear response curve for custom saturation drive mode. + factor = factor * factor; + const float inverse = 1.0f - factor; + + // Drive saturation strength with mask while preserving source alpha. + effected_row[idx] = static_cast( + (original_row[idx] * inverse) + (effected_row[idx] * factor)); + effected_row[idx + 1] = static_cast( + (original_row[idx + 1] * inverse) + (effected_row[idx + 1] * factor)); + effected_row[idx + 2] = static_cast( + (original_row[idx + 2] * inverse) + (effected_row[idx + 2] * factor)); + effected_row[idx + 3] = original_row[idx + 3]; + } } } else { #pragma omp parallel for schedule(static) - for (int i = 0; i < pixel_count; ++i) { - const int idx = i * 4; - float factor = static_cast(qGray(mask_pixels[idx], mask_pixels[idx + 1], mask_pixels[idx + 2])) / 255.0f; - // Use a non-linear response curve for custom saturation drive mode. - factor = factor * factor; - const float inverse = 1.0f - factor; - - // Drive saturation strength with mask while preserving source alpha. - effected_pixels[idx] = static_cast( - (original_pixels[idx] * inverse) + (effected_pixels[idx] * factor)); - effected_pixels[idx + 1] = static_cast( - (original_pixels[idx + 1] * inverse) + (effected_pixels[idx + 1] * factor)); - effected_pixels[idx + 2] = static_cast( - (original_pixels[idx + 2] * inverse) + (effected_pixels[idx + 2] * factor)); - effected_pixels[idx + 3] = original_pixels[idx + 3]; + for (int y = 0; y < height; ++y) { + unsigned char* original_row = original_pixels + y * original_stride; + unsigned char* effected_row = effected_pixels + y * effected_stride; + unsigned char* mask_row = mask_pixels + y * mask_stride; + for (int x = 0; x < width; ++x) { + const int idx = x * 4; + float factor = static_cast(qGray(mask_row[idx], mask_row[idx + 1], mask_row[idx + 2])) / 255.0f; + // Use a non-linear response curve for custom saturation drive mode. + factor = factor * factor; + const float inverse = 1.0f - factor; + + // Drive saturation strength with mask while preserving source alpha. + effected_row[idx] = static_cast( + (original_row[idx] * inverse) + (effected_row[idx] * factor)); + effected_row[idx + 1] = static_cast( + (original_row[idx + 1] * inverse) + (effected_row[idx + 1] * factor)); + effected_row[idx + 2] = static_cast( + (original_row[idx + 2] * inverse) + (effected_row[idx + 2] * factor)); + effected_row[idx + 3] = original_row[idx + 3]; + } } } } @@ -141,6 +155,9 @@ std::shared_ptr Saturation::GetFrame(std::shared_ptr(frame_image->bits()); + const int width = frame_image->width(); + const int height = frame_image->height(); + const int stride = frame_image->bytesPerLine(); // LUT for undoing premultiplication without a per-pixel divide. static const std::array inv_alpha = [] { std::array lut{}; @@ -191,39 +208,42 @@ std::shared_ptr Saturation::GetFrame(std::shared_ptr= 16384) schedule(static) shared (pixels) - for (int pixel = 0; pixel < pixel_count; ++pixel) + for (int y = 0; y < height; ++y) { - const int idx = pixel * 4; - - // Split hot paths by alpha to avoid unnecessary premultiply/unpremultiply work. - const int A = pixels[idx + 3]; - if (A <= 0) - continue; - int R = 0; - int G = 0; - int B = 0; - if (A == 255) { - R = pixels[idx + 0]; - G = pixels[idx + 1]; - B = pixels[idx + 2]; - apply_saturation(R, G, B); - pixels[idx + 0] = static_cast(R); - pixels[idx + 1] = static_cast(G); - pixels[idx + 2] = static_cast(B); - } else { - const float alpha_percent = static_cast(A) * (1.0f / 255.0f); - const float inv_alpha_percent = inv_alpha[A]; - - // Get RGB values, and remove pre-multiplied alpha - R = static_cast(pixels[idx + 0] * inv_alpha_percent); - G = static_cast(pixels[idx + 1] * inv_alpha_percent); - B = static_cast(pixels[idx + 2] * inv_alpha_percent); - apply_saturation(R, G, B); - - // Pre-multiply alpha back into color channels - pixels[idx + 0] = static_cast(R * alpha_percent); - pixels[idx + 1] = static_cast(G * alpha_percent); - pixels[idx + 2] = static_cast(B * alpha_percent); + unsigned char* row = pixels + y * stride; + for (int x = 0; x < width; ++x) { + const int idx = x * 4; + + // Split hot paths by alpha to avoid unnecessary premultiply/unpremultiply work. + const int A = row[idx + 3]; + if (A <= 0) + continue; + int R = 0; + int G = 0; + int B = 0; + if (A == 255) { + R = row[idx + 0]; + G = row[idx + 1]; + B = row[idx + 2]; + apply_saturation(R, G, B); + row[idx + 0] = static_cast(R); + row[idx + 1] = static_cast(G); + row[idx + 2] = static_cast(B); + } else { + const float alpha_percent = static_cast(A) * (1.0f / 255.0f); + const float inv_alpha_percent = inv_alpha[A]; + + // Get RGB values, and remove pre-multiplied alpha + R = static_cast(row[idx + 0] * inv_alpha_percent); + G = static_cast(row[idx + 1] * inv_alpha_percent); + B = static_cast(row[idx + 2] * inv_alpha_percent); + apply_saturation(R, G, B); + + // Pre-multiply alpha back into color channels + row[idx + 0] = static_cast(R * alpha_percent); + row[idx + 1] = static_cast(G * alpha_percent); + row[idx + 2] = static_cast(B * alpha_percent); + } } } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 51f5d7958..640e7d20e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -67,6 +67,7 @@ set(OPENSHOT_TESTS BenchmarkArgs EffectMask Mask + Saturation Sharpen Shadow SphericalEffect diff --git a/tests/Saturation.cpp b/tests/Saturation.cpp new file mode 100644 index 000000000..e46ca8008 --- /dev/null +++ b/tests/Saturation.cpp @@ -0,0 +1,78 @@ +/** + * @file + * @brief Unit tests for Saturation effect behavior + */ + +// Copyright (c) 2008-2026 OpenShot Studios, LLC +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#include "openshot_catch.h" + +#include + +#include + +#include "Frame.h" +#include "effects/Saturation.h" + +using namespace openshot; + +namespace { + void fill_rgba_pattern(QImage& image) { + for (int y = 0; y < image.height(); ++y) { + auto* row = image.scanLine(y); + for (int x = 0; x < image.width(); ++x) { + const int idx = x * 4; + row[idx + 0] = static_cast((x * 17 + y * 3) % 256); + row[idx + 1] = static_cast((x * 5 + y * 11) % 256); + row[idx + 2] = static_cast((x * 13 + y * 7) % 256); + row[idx + 3] = 255; + } + } + } +} + +TEST_CASE("Saturation respects padded QImage stride in OpenMP path", "[libopenshot][effect][saturation][omp]") +{ + const int width = 13; + const int height = 1400; + const int padded_stride = (width + 5) * 4; + const unsigned char sentinel = 0xA5; + + std::vector padded_buffer(padded_stride * height, sentinel); + auto padded = std::make_shared( + padded_buffer.data(), width, height, padded_stride, + QImage::Format_RGBA8888_Premultiplied); + fill_rgba_pattern(*padded); + + QImage tight(width, height, QImage::Format_RGBA8888_Premultiplied); + fill_rgba_pattern(tight); + + auto padded_frame = std::make_shared(1, width, height, "#000000", 0, 2); + padded_frame->AddImage(padded); + auto tight_frame = std::make_shared(1, width, height, "#000000", 0, 2); + tight_frame->AddImage(std::make_shared(tight)); + + Saturation padded_effect(Keyframe(0.35), Keyframe(1.1), Keyframe(0.9), Keyframe(1.2)); + Saturation tight_effect(Keyframe(0.35), Keyframe(1.1), Keyframe(0.9), Keyframe(1.2)); + + auto padded_out = padded_effect.GetFrame(padded_frame, 1)->GetImage(); + auto tight_out = tight_effect.GetFrame(tight_frame, 1)->GetImage(); + + REQUIRE(padded_out->bytesPerLine() == padded_stride); + bool active_pixels_match = true; + bool padding_unchanged = true; + for (int y = 0; y < height; ++y) { + const auto* padded_row = padded_out->constScanLine(y); + const auto* tight_row = tight_out->constScanLine(y); + for (int x = 0; x < width * 4; ++x) { + active_pixels_match = active_pixels_match && padded_row[x] == tight_row[x]; + } + for (int x = width * 4; x < padded_stride; ++x) { + padding_unchanged = padding_unchanged && padded_row[x] == sentinel; + } + } + CHECK(active_pixels_match); + CHECK(padding_unchanged); +} diff --git a/tests/Timeline.cpp b/tests/Timeline.cpp index 0c3b2a88e..d4e0db7c6 100644 --- a/tests/Timeline.cpp +++ b/tests/Timeline.cpp @@ -33,6 +33,7 @@ #include "effects/Bars.h" #include "effects/Mask.h" #include "effects/Negate.h" +#include "effects/Saturation.h" using namespace openshot; @@ -956,6 +957,70 @@ TEST_CASE( "GetClipEffect by id", "[libopenshot][timeline]" ) CHECK(match1->Layer() == 2); } +TEST_CASE( "Parent effect update preserves child effect id", "[libopenshot][timeline]" ) +{ + Timeline t(640, 480, Fraction(30, 1), 44100, 2, LAYOUT_STEREO); + + std::stringstream path1; + path1 << TEST_MEDIA_PATH << "interlaced.png"; + auto media_path1 = path1.str(); + + Clip parent_clip(media_path1); + parent_clip.Id("CLIP-PARENT"); + + Clip child_clip(media_path1); + child_clip.Id("CLIP-CHILD"); + + Clip grandchild_clip(media_path1); + grandchild_clip.Id("CLIP-GRANDCHILD"); + + t.AddClip(&parent_clip); + t.AddClip(&child_clip); + t.AddClip(&grandchild_clip); + + Saturation parent_effect; + parent_effect.Id("EFFECT-PARENT"); + parent_clip.AddEffect(&parent_effect); + + Saturation child_effect; + child_effect.Id("EFFECT-CHILD"); + child_clip.AddEffect(&child_effect); + + Saturation grandchild_effect; + grandchild_effect.Id("EFFECT-GRANDCHILD"); + grandchild_clip.AddEffect(&grandchild_effect); + + Json::Value child_json = child_effect.JsonValue(); + child_json["parent_effect_id"] = parent_effect.Id(); + child_effect.SetJsonValue(child_json); + REQUIRE(t.GetClipEffect("EFFECT-CHILD") != nullptr); + + Json::Value grandchild_json = grandchild_effect.JsonValue(); + grandchild_json["parent_effect_id"] = child_effect.Id(); + grandchild_effect.SetJsonValue(grandchild_json); + REQUIRE(t.GetClipEffect("EFFECT-GRANDCHILD") != nullptr); + + Json::Value parent_json = parent_effect.JsonValue(); + parent_json["order"] = 7; + parent_json["saturation"] = Keyframe(2.25).JsonValue(); + parent_json["saturation_R"] = Keyframe(0.5).JsonValue(); + parent_effect.SetJsonValue(parent_json); + + REQUIRE(t.GetClipEffect("EFFECT-PARENT") != nullptr); + REQUIRE(t.GetClipEffect("EFFECT-CHILD") != nullptr); + REQUIRE(t.GetClipEffect("EFFECT-GRANDCHILD") != nullptr); + CHECK(t.GetClipEffect("EFFECT-CHILD")->Id() == "EFFECT-CHILD"); + CHECK(t.GetClipEffect("EFFECT-GRANDCHILD")->Id() == "EFFECT-GRANDCHILD"); + CHECK(child_effect.Order() == 7); + CHECK(grandchild_effect.Order() == 7); + CHECK(child_effect.saturation.GetValue(1) == Approx(2.25)); + CHECK(child_effect.saturation_R.GetValue(1) == Approx(0.5)); + CHECK(grandchild_effect.saturation.GetValue(1) == Approx(2.25)); + CHECK(grandchild_effect.saturation_R.GetValue(1) == Approx(0.5)); + CHECK(openshot::stringToJson(child_effect.PropertiesJSON(1))["parent_effect_id"]["memo"].asString() == "EFFECT-PARENT"); + CHECK(openshot::stringToJson(grandchild_effect.PropertiesJSON(1))["parent_effect_id"]["memo"].asString() == "EFFECT-CHILD"); +} + TEST_CASE( "GetEffect by id", "[libopenshot][timeline]" ) { Timeline t(640, 480, Fraction(30, 1), 44100, 2, LAYOUT_STEREO); From 14c63f3714905f6a0c717b101f3bf1a770adee23 Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Tue, 12 May 2026 15:15:15 -0500 Subject: [PATCH 05/14] Fix margin-bounded effects to avoid invalid image access - Clamp and validate Blur, Bars, and Pixelate margin-derived regions - Add regression coverage for collapsed/out-of-range margin cases - Verify focused Bars, Blur, and Pixelate tests pass --- src/effects/Bars.cpp | 36 +++++++++++++------ src/effects/Blur.cpp | 76 ++++++++++++---------------------------- src/effects/Pixelate.cpp | 24 ++++++++++++- tests/Bars.cpp | 41 ++++++++++++++++++++++ tests/Blur.cpp | 19 ++++++++++ tests/CMakeLists.txt | 2 ++ tests/Pixelate.cpp | 39 +++++++++++++++++++++ 7 files changed, 172 insertions(+), 65 deletions(-) create mode 100644 tests/Bars.cpp create mode 100644 tests/Pixelate.cpp diff --git a/src/effects/Bars.cpp b/src/effects/Bars.cpp index 00f5a8ce4..fec49da79 100644 --- a/src/effects/Bars.cpp +++ b/src/effects/Bars.cpp @@ -13,8 +13,20 @@ #include "Bars.h" #include "Exceptions.h" +#include + using namespace openshot; +namespace { +double clamp_margin(double value) { + if (value < 0.0) + return 0.0; + if (value > 1.0) + return 1.0; + return value; +} +} + /// Blank constructor, useful when using Json to load the effect properties Bars::Bars() : color("#000000"), left(0.0), top(0.1), right(0.0), bottom(0.1) { // Init effect properties @@ -49,10 +61,14 @@ std::shared_ptr Bars::GetFrame(std::shared_ptr { // Get the frame's image std::shared_ptr frame_image = frame->GetImage(); + const int width = frame_image->width(); + const int height = frame_image->height(); + if (width <= 0 || height <= 0) + return frame; // Get bar color (and create small color image) auto tempColor = std::make_shared( - frame_image->width(), 1, QImage::Format_RGBA8888_Premultiplied); + width, 1, QImage::Format_RGBA8888_Premultiplied); tempColor->fill(QColor(QString::fromStdString(color.GetColorHex(frame_number)))); // Get current keyframe values @@ -66,26 +82,26 @@ std::shared_ptr Bars::GetFrame(std::shared_ptr unsigned char *color_pixels = (unsigned char *) tempColor->bits(); // Get pixels sizes of all bars - int top_bar_height = top_value * frame_image->height(); - int bottom_bar_height = bottom_value * frame_image->height(); - int left_bar_width = left_value * frame_image->width(); - int right_bar_width = right_value * frame_image->width(); + int top_bar_height = clamp_margin(top_value) * height; + int bottom_bar_height = clamp_margin(bottom_value) * height; + int left_bar_width = clamp_margin(left_value) * width; + int right_bar_width = clamp_margin(right_value) * width; // Loop through rows - for (int row = 0; row < frame_image->height(); row++) { + for (int row = 0; row < height; row++) { // Top & Bottom Bar - if ((top_bar_height > 0.0 && row <= top_bar_height) || (bottom_bar_height > 0.0 && row >= frame_image->height() - bottom_bar_height)) { - memcpy(&pixels[row * frame_image->width() * 4], color_pixels, sizeof(char) * frame_image->width() * 4); + if ((top_bar_height > 0.0 && row <= top_bar_height) || (bottom_bar_height > 0.0 && row >= height - bottom_bar_height)) { + memcpy(&pixels[row * width * 4], color_pixels, sizeof(char) * width * 4); } else { // Left Bar if (left_bar_width > 0.0) { - memcpy(&pixels[row * frame_image->width() * 4], color_pixels, sizeof(char) * left_bar_width * 4); + memcpy(&pixels[row * width * 4], color_pixels, sizeof(char) * left_bar_width * 4); } // Right Bar if (right_bar_width > 0.0) { - memcpy(&pixels[((row * frame_image->width()) + (frame_image->width() - right_bar_width)) * 4], color_pixels, sizeof(char) * right_bar_width * 4); + memcpy(&pixels[((row * width) + (width - right_bar_width)) * 4], color_pixels, sizeof(char) * right_bar_width * 4); } } } diff --git a/src/effects/Blur.cpp b/src/effects/Blur.cpp index 65c53f561..43f3ce8b3 100644 --- a/src/effects/Blur.cpp +++ b/src/effects/Blur.cpp @@ -201,42 +201,26 @@ void Blur::boxBlurH(unsigned char *scl, unsigned char *tcl, int w, int h, int r) const unsigned char* src = scl + i * w * 4; unsigned char* dst = tcl + i * w * 4; - const unsigned char* first = src; - const unsigned char* last = src + (w - 1) * 4; - - int val[4]; - for (int c = 0; c < 4; ++c) - val[c] = (r + 1) * first[c]; - for (int j = 0; j < r; ++j) { - const unsigned char* p = src + j * 4; + int val[4] = {0, 0, 0, 0}; + for (int j = -r; j <= r; ++j) { + const int sample_x = std::min(std::max(j, 0), w - 1); + const unsigned char* p = src + sample_x * 4; for (int c = 0; c < 4; ++c) val[c] += p[c]; } - int li = 0, ri = r; - for (int j = 0; j <= r; ++j, ++ri) { - const unsigned char* add = src + ri * 4; + for (int j = 0; j < w; ++j) { unsigned char* out = dst + j * 4; for (int c = 0; c < 4; ++c) { - val[c] += add[c] - first[c]; out[c] = (unsigned char)(val[c] * iarr + 0.5f); } - } - for (int j = r + 1; j < w - r; ++j, ++li, ++ri) { - const unsigned char* add = src + ri * 4; - const unsigned char* sub = src + li * 4; - unsigned char* out = dst + j * 4; - for (int c = 0; c < 4; ++c) { - val[c] += add[c] - sub[c]; - out[c] = (unsigned char)(val[c] * iarr + 0.5f); - } - } - for (int j = w - r; j < w; ++j, ++li) { - const unsigned char* sub = src + li * 4; - unsigned char* out = dst + j * 4; + + const int remove_x = std::min(std::max(j - r, 0), w - 1); + const int add_x = std::min(std::max(j + r + 1, 0), w - 1); + const unsigned char* remove = src + remove_x * 4; + const unsigned char* add = src + add_x * 4; for (int c = 0; c < 4; ++c) { - val[c] += last[c] - sub[c]; - out[c] = (unsigned char)(val[c] * iarr + 0.5f); + val[c] += add[c] - remove[c]; } } } @@ -251,42 +235,26 @@ void Blur::boxBlurT(unsigned char *scl, unsigned char *tcl, int w, int h, int r) const unsigned char* col_src = scl + i * 4; unsigned char* col_dst = tcl + i * 4; - const unsigned char* first = col_src; - const unsigned char* last = col_src + (h - 1) * stride; - - int val[4]; - for (int c = 0; c < 4; ++c) - val[c] = (r + 1) * first[c]; - for (int j = 0; j < r; ++j) { - const unsigned char* p = col_src + j * stride; + int val[4] = {0, 0, 0, 0}; + for (int j = -r; j <= r; ++j) { + const int sample_y = std::min(std::max(j, 0), h - 1); + const unsigned char* p = col_src + sample_y * stride; for (int c = 0; c < 4; ++c) val[c] += p[c]; } - int li = 0, ri = r; - for (int j = 0; j <= r; ++j, ++ri) { - const unsigned char* add = col_src + ri * stride; + for (int j = 0; j < h; ++j) { unsigned char* out = col_dst + j * stride; for (int c = 0; c < 4; ++c) { - val[c] += add[c] - first[c]; out[c] = (unsigned char)(val[c] * iarr + 0.5f); } - } - for (int j = r + 1; j < h - r; ++j, ++li, ++ri) { - const unsigned char* add = col_src + ri * stride; - const unsigned char* sub = col_src + li * stride; - unsigned char* out = col_dst + j * stride; - for (int c = 0; c < 4; ++c) { - val[c] += add[c] - sub[c]; - out[c] = (unsigned char)(val[c] * iarr + 0.5f); - } - } - for (int j = h - r; j < h; ++j, ++li) { - const unsigned char* sub = col_src + li * stride; - unsigned char* out = col_dst + j * stride; + + const int remove_y = std::min(std::max(j - r, 0), h - 1); + const int add_y = std::min(std::max(j + r + 1, 0), h - 1); + const unsigned char* remove = col_src + remove_y * stride; + const unsigned char* add = col_src + add_y * stride; for (int c = 0; c < 4; ++c) { - val[c] += last[c] - sub[c]; - out[c] = (unsigned char)(val[c] * iarr + 0.5f); + val[c] += add[c] - remove[c]; } } } diff --git a/src/effects/Pixelate.cpp b/src/effects/Pixelate.cpp index f145ccd97..ac0fe771c 100644 --- a/src/effects/Pixelate.cpp +++ b/src/effects/Pixelate.cpp @@ -19,8 +19,20 @@ #include #include +#include + using namespace openshot; +namespace { +double clamp_margin(double value) { + if (value < 0.0) + return 0.0; + if (value > 1.0) + return 1.0; + return value; +} +} + /// Blank constructor, useful when using Json to load the effect properties Pixelate::Pixelate() : pixelization(0.5), left(0.0), top(0.0), right(0.0), bottom(0.0), mask_mode(PIXELATE_MASK_LIMIT_TO_AREA) { @@ -69,10 +81,20 @@ Pixelate::GetFrame(std::shared_ptr frame, int64_t frame_number) if (pixelization_value > 0.0) { int w = frame_image->width(); int h = frame_image->height(); + if (w <= 0 || h <= 0) + return frame; // Define area we're working on in terms of a QRect with QMargins applied QRect area(QPoint(0,0), frame_image->size()); - area = area.marginsRemoved({int(left_value * w), int(top_value * h), int(right_value * w), int(bottom_value * h)}); + area = area.marginsRemoved({ + int(clamp_margin(left_value) * w), + int(clamp_margin(top_value) * h), + int(clamp_margin(right_value) * w), + int(clamp_margin(bottom_value) * h) + }); + area = area.intersected(QRect(QPoint(0,0), frame_image->size())); + if (area.isEmpty()) + return frame; int scale_to = (int) (area.width() * pixelization_value); if (scale_to < 1) { diff --git a/tests/Bars.cpp b/tests/Bars.cpp new file mode 100644 index 000000000..7e2d811e6 --- /dev/null +++ b/tests/Bars.cpp @@ -0,0 +1,41 @@ +/** + * @file + * @brief Unit tests for Bars effect + * + * @ref License + */ + +// Copyright (c) 2008-2026 OpenShot Studios, LLC +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#include "openshot_catch.h" + +#include "Frame.h" +#include "effects/Bars.h" + +#include +#include + +using namespace openshot; + +TEST_CASE("Bars margins clamp out-of-range values", "[effect][bars]") { + auto frame = std::make_shared(1, 8, 8, "#336699"); + auto image = frame->GetImage(); + + Bars effect(Color("#000000"), Keyframe(2.0), Keyframe(-1.0), Keyframe(2.0), Keyframe(-1.0)); + + REQUIRE_NOTHROW(effect.GetFrame(frame, 1)); + CHECK(image->size() == QSize(8, 8)); +} + +TEST_CASE("Bars margins handle full-frame bars", "[effect][bars]") { + auto frame = std::make_shared(1, 8, 8, "#336699"); + auto image = frame->GetImage(); + + Bars effect(Color("#000000"), Keyframe(0.0), Keyframe(1.0), Keyframe(0.0), Keyframe(1.0)); + + REQUIRE_NOTHROW(effect.GetFrame(frame, 1)); + CHECK(image->pixelColor(0, 0) == QColor(0, 0, 0, 255)); + CHECK(image->pixelColor(7, 7) == QColor(0, 0, 0, 255)); +} diff --git a/tests/Blur.cpp b/tests/Blur.cpp index 3edf708c5..bbe4d0460 100644 --- a/tests/Blur.cpp +++ b/tests/Blur.cpp @@ -40,6 +40,25 @@ TEST_CASE("Blur margins limit affected area", "[effect][blur]") { CHECK(out_image->pixelColor(5, 0).red() > 0); } +TEST_CASE("Blur margins handle small affected area", "[effect][blur]") { + auto frame = std::make_shared(1, 8, 8, "#000000"); + auto image = frame->GetImage(); + + for (int y = 0; y < image->height(); ++y) { + for (int x = 0; x < image->width(); ++x) { + image->setPixelColor(x, y, ((x + y) % 2 == 0) + ? QColor(255, 255, 255, 255) + : QColor(0, 0, 0, 255)); + } + } + + Blur effect(Keyframe(6.0), Keyframe(6.0), Keyframe(3.0), Keyframe(1.0), + Keyframe(0.25), Keyframe(0.25), Keyframe(0.25), Keyframe(0.25)); + + REQUIRE_NOTHROW(effect.GetFrame(frame, 1)); + CHECK(image->size() == QSize(8, 8)); +} + TEST_CASE("Blur margin properties serialize", "[effect][blur][json]") { Blur effect(Keyframe(6.0), Keyframe(6.0), Keyframe(3.0), Keyframe(3.0), Keyframe(0.1), Keyframe(0.2), Keyframe(0.3), Keyframe(0.4)); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 640e7d20e..0167c9fab 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -53,6 +53,7 @@ set(OPENSHOT_TESTS VideoCacheThread # Effects AudioVisualization + Bars BeatSync Blur ColorGrade @@ -67,6 +68,7 @@ set(OPENSHOT_TESTS BenchmarkArgs EffectMask Mask + Pixelate Saturation Sharpen Shadow diff --git a/tests/Pixelate.cpp b/tests/Pixelate.cpp new file mode 100644 index 000000000..daf75f4ca --- /dev/null +++ b/tests/Pixelate.cpp @@ -0,0 +1,39 @@ +/** + * @file + * @brief Unit tests for Pixelate effect + * + * @ref License + */ + +// Copyright (c) 2008-2026 OpenShot Studios, LLC +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#include "openshot_catch.h" + +#include "Frame.h" +#include "effects/Pixelate.h" + +#include + +using namespace openshot; + +TEST_CASE("Pixelate margins handle collapsed area", "[effect][pixelate]") { + auto frame = std::make_shared(1, 8, 8, "#336699"); + auto image = frame->GetImage(); + + Pixelate effect(Keyframe(0.5), Keyframe(0.75), Keyframe(0.0), Keyframe(0.75), Keyframe(0.0)); + + REQUIRE_NOTHROW(effect.GetFrame(frame, 1)); + CHECK(image->size() == QSize(8, 8)); +} + +TEST_CASE("Pixelate margins clamp out-of-range values", "[effect][pixelate]") { + auto frame = std::make_shared(1, 8, 8, "#336699"); + auto image = frame->GetImage(); + + Pixelate effect(Keyframe(0.5), Keyframe(-1.0), Keyframe(-1.0), Keyframe(2.0), Keyframe(2.0)); + + REQUIRE_NOTHROW(effect.GetFrame(frame, 1)); + CHECK(image->size() == QSize(8, 8)); +} From 4f6cb7c50f7754503c0fa9ca087373c47925ed6c Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Wed, 13 May 2026 00:00:29 -0500 Subject: [PATCH 06/14] Updating gitlab builder for Linux to use new /opt installed OpenCV 4.5.5 folder, and return error message on < 4.3.0 OpenCV versions for Object Detection effect. --- .gitlab-ci.yml | 10 +++++++++- src/CVObjectDetection.cpp | 8 ++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5f461d9a5..f0457e39c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -22,6 +22,9 @@ init-builders: linux-builder: stage: build-libopenshot + variables: + OPENCV_ROOT: /opt/opencv-4.5.5 + OpenCV_DIR: /opt/opencv-4.5.5/lib/cmake/opencv4 artifacts: expire_in: 6 months paths: @@ -33,8 +36,13 @@ linux-builder: - fi - unzip artifacts.zip - export LIBOPENSHOT_AUDIO_DIR=$CI_PROJECT_DIR/build/install-x64 + - export CMAKE_PREFIX_PATH="$OPENCV_ROOT:$CMAKE_PREFIX_PATH" + - export PKG_CONFIG_PATH="$OPENCV_ROOT/lib/pkgconfig:$PKG_CONFIG_PATH" + - export LD_LIBRARY_PATH="$OPENCV_ROOT/lib:$LD_LIBRARY_PATH" + - export PATH="$OPENCV_ROOT/bin:$PATH" + - opencv_version - mkdir -p build; cd build; - - cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -D"CMAKE_INSTALL_PREFIX:PATH=$CI_PROJECT_DIR/build/install-x64" -D"PYTHON_MODULE_PATH=python" -D"RUBY_MODULE_PATH=ruby" -DCMAKE_BUILD_TYPE:STRING=Release -DAPPIMAGE_BUILD=1 -DUSE_SYSTEM_JSONCPP=0 ../ + - cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -D"CMAKE_INSTALL_PREFIX:PATH=$CI_PROJECT_DIR/build/install-x64" -D"OpenCV_DIR:PATH=$OpenCV_DIR" -D"PYTHON_MODULE_PATH=python" -D"RUBY_MODULE_PATH=ruby" -DCMAKE_BUILD_TYPE:STRING=Release -DAPPIMAGE_BUILD=1 -DUSE_SYSTEM_JSONCPP=0 ../ - make -j 4 - make install - ctest --output-on-failure -VV diff --git a/src/CVObjectDetection.cpp b/src/CVObjectDetection.cpp index f10ae302e..8e11310b3 100644 --- a/src/CVObjectDetection.cpp +++ b/src/CVObjectDetection.cpp @@ -86,6 +86,14 @@ void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start, std::string line; while (std::getline(classes_file, line)) classNames.push_back(line); +#if CV_VERSION_MAJOR < 4 || (CV_VERSION_MAJOR == 4 && CV_VERSION_MINOR < 3) + processingController->SetError(true, + std::string("Failed to load ONNX model: YOLOv5 requires OpenCV 4.3.0 or newer. " + "This OpenCV build is ") + CV_VERSION + "."); + error = true; + return; +#endif + // Load the network try { net = cv::dnn::readNetFromONNX(modelPath); From 47d1784b1c7fd6f4955f38aaecfcf7bc2f531c4b Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Wed, 13 May 2026 14:30:40 -0500 Subject: [PATCH 07/14] Validate YOLO ONNX models in libopenshot - Add libopenshot-side ONNX model validation for object detection - Return user-displayable error text instead of throwing through callers - Share ONNX load/error handling between validation and object detection processing - Fail early for YOLOv5 on OpenCV older than 4.3 - Expose validation through ClipProcessingJobs for Python bindings --- src/CVObjectDetection.cpp | 62 +++++++++++++++++++++++--------------- src/CVObjectDetection.h | 2 ++ src/ClipProcessingJobs.cpp | 8 +++++ src/ClipProcessingJobs.h | 1 + 4 files changed, 49 insertions(+), 24 deletions(-) diff --git a/src/CVObjectDetection.cpp b/src/CVObjectDetection.cpp index 8e11310b3..fa82eba5d 100644 --- a/src/CVObjectDetection.cpp +++ b/src/CVObjectDetection.cpp @@ -26,6 +26,37 @@ using namespace std; using namespace openshot; using google::protobuf::util::TimeUtil; +namespace { + +std::string LoadONNXModel(std::string modelPath, cv::dnn::Net *net) +{ +#if CV_VERSION_MAJOR < 4 || (CV_VERSION_MAJOR == 4 && CV_VERSION_MINOR < 3) + return std::string("Failed to load ONNX model: YOLOv5 requires OpenCV 4.3.0 or newer. " + "This OpenCV build is ") + CV_VERSION + "."; +#else + try { + cv::dnn::Net loaded_net = cv::dnn::readNetFromONNX(modelPath); + if (net) { + *net = loaded_net; + } + return ""; + } catch (const cv::Exception& e) { + std::string error_text = std::string("Failed to load ONNX model: ") + e.what(); + if (error_text.find("Unsupported data type: FLOAT16") != std::string::npos) { + error_text = "Failed to load ONNX model: FLOAT16 is not supported by this OpenCV build. " + "Please use an FP32 ONNX model."; + } + return error_text; + } catch (const std::exception& e) { + return std::string("Failed to load ONNX model: ") + e.what(); + } catch (...) { + return "Failed to load ONNX model: unknown error"; + } +#endif +} + +} + CVObjectDetection::CVObjectDetection(std::string processInfoJson, ProcessingController &processingController) : processingController(&processingController), processingDevice("CPU"), inpWidth(640), inpHeight(640){ confThreshold = 0.25; @@ -33,6 +64,11 @@ CVObjectDetection::CVObjectDetection(std::string processInfoJson, ProcessingCont SetJson(processInfoJson); } +std::string CVObjectDetection::ValidateONNXModel(std::string modelPath) +{ + return LoadONNXModel(modelPath, nullptr); +} + void CVObjectDetection::setProcessingDevice(){ if(processingDevice == "GPU"){ net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); @@ -86,34 +122,12 @@ void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start, std::string line; while (std::getline(classes_file, line)) classNames.push_back(line); -#if CV_VERSION_MAJOR < 4 || (CV_VERSION_MAJOR == 4 && CV_VERSION_MINOR < 3) - processingController->SetError(true, - std::string("Failed to load ONNX model: YOLOv5 requires OpenCV 4.3.0 or newer. " - "This OpenCV build is ") + CV_VERSION + "."); - error = true; - return; -#endif - // Load the network - try { - net = cv::dnn::readNetFromONNX(modelPath); - } catch (const cv::Exception& e) { - std::string error_text = std::string("Failed to load model: ") + e.what(); - if (error_text.find("Unsupported data type: FLOAT16") != std::string::npos) { - error_text = "Failed to load ONNX model: FLOAT16 is not supported by this OpenCV build. " - "Please use an FP32 ONNX model."; - } + std::string error_text = LoadONNXModel(modelPath, &net); + if (!error_text.empty()) { processingController->SetError(true, error_text); error = true; return; - } catch (const std::exception& e) { - processingController->SetError(true, std::string("Failed to load ONNX model: ") + e.what()); - error = true; - return; - } catch (...) { - processingController->SetError(true, "Failed to load ONNX model: unknown error"); - error = true; - return; } setProcessingDevice(); diff --git a/src/CVObjectDetection.h b/src/CVObjectDetection.h index 481e107ab..d993817ee 100644 --- a/src/CVObjectDetection.h +++ b/src/CVObjectDetection.h @@ -107,6 +107,8 @@ namespace openshot CVObjectDetection(std::string processInfoJson, ProcessingController &processingController); + static std::string ValidateONNXModel(std::string modelPath); + // Iterate over a clip object and run inference for each video frame void detectObjectsClip(openshot::Clip &video, size_t start=0, size_t end=0, bool process_interval=false); diff --git a/src/ClipProcessingJobs.cpp b/src/ClipProcessingJobs.cpp index c90e1a153..19a237ab1 100644 --- a/src/ClipProcessingJobs.cpp +++ b/src/ClipProcessingJobs.cpp @@ -20,6 +20,14 @@ ClipProcessingJobs::ClipProcessingJobs(std::string processingType, std::string p processingType(processingType), processInfoJson(processInfoJson){ } +std::string ClipProcessingJobs::ValidateONNXModel(std::string modelPath){ +#ifdef USE_OPENCV + return CVObjectDetection::ValidateONNXModel(modelPath); +#else + return "OpenShot was not compiled with OpenCV support."; +#endif +} + void ClipProcessingJobs::processClip(Clip& clip, std::string json){ processInfoJson = json; diff --git a/src/ClipProcessingJobs.h b/src/ClipProcessingJobs.h index 304433cbd..dff27b265 100644 --- a/src/ClipProcessingJobs.h +++ b/src/ClipProcessingJobs.h @@ -56,6 +56,7 @@ class ClipProcessingJobs{ public: // Constructor ClipProcessingJobs(std::string processingType, std::string processInfoJson); + static std::string ValidateONNXModel(std::string modelPath); // Process clip accordingly to processingType void processClip(Clip& clip, std::string json); From c9380a8c0ee6e0c74a24d9878a54ad195744f99c Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Wed, 13 May 2026 22:48:08 -0500 Subject: [PATCH 08/14] Updating OpenCV on Linux and Windows builders to use a newly installed 4.13.0 version (built from source) --- .gitlab-ci.yml | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f0457e39c..6d85ac65e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -23,8 +23,8 @@ init-builders: linux-builder: stage: build-libopenshot variables: - OPENCV_ROOT: /opt/opencv-4.5.5 - OpenCV_DIR: /opt/opencv-4.5.5/lib/cmake/opencv4 + OPENCV_ROOT: /opt/opencv-4.13.0 + OpenCV_DIR: /opt/opencv-4.13.0/lib/cmake/opencv4 artifacts: expire_in: 6 months paths: @@ -40,7 +40,6 @@ linux-builder: - export PKG_CONFIG_PATH="$OPENCV_ROOT/lib/pkgconfig:$PKG_CONFIG_PATH" - export LD_LIBRARY_PATH="$OPENCV_ROOT/lib:$LD_LIBRARY_PATH" - export PATH="$OPENCV_ROOT/bin:$PATH" - - opencv_version - mkdir -p build; cd build; - cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -D"CMAKE_INSTALL_PREFIX:PATH=$CI_PROJECT_DIR/build/install-x64" -D"OpenCV_DIR:PATH=$OpenCV_DIR" -D"PYTHON_MODULE_PATH=python" -D"RUBY_MODULE_PATH=ruby" -DCMAKE_BUILD_TYPE:STRING=Release -DAPPIMAGE_BUILD=1 -DUSE_SYSTEM_JSONCPP=0 ../ - make -j 4 @@ -96,9 +95,11 @@ windows-builder-x64: - try { Invoke-WebRequest -Uri "http://gitlab.openshot.org/OpenShot/libopenshot-audio/-/jobs/artifacts/$CI_COMMIT_REF_NAME/download?job=windows-builder-x64" -Headers @{"PRIVATE-TOKEN"="$ACCESS_TOKEN"} -OutFile "artifacts.zip" } catch { $_.Exception.Response.StatusCode.Value__ } - if (-not (Test-Path "artifacts.zip")) { Invoke-WebRequest -Uri "http://gitlab.openshot.org/OpenShot/libopenshot-audio/-/jobs/artifacts/develop/download?job=windows-builder-x64" -Headers @{"PRIVATE-TOKEN"="$ACCESS_TOKEN"} -OutFile "artifacts.zip" } - Expand-Archive -Path artifacts.zip -DestinationPath . - - $env:Path = "C:\msys64\mingw64\bin;C:\msys64\usr\bin;C:\msys64\usr\local\bin;" + $env:Path; + - $env:OPENCV_ROOT = "C:\msys64\mingw64\opencv-4.13.0" + - $env:OpenCV_DIR = "$env:OPENCV_ROOT\lib\cmake\opencv4" + - $env:Path = "$env:OPENCV_ROOT\bin;C:\msys64\mingw64\bin;C:\msys64\usr\bin;C:\msys64\usr\local\bin;" + $env:Path; - $env:MSYSTEM = "MINGW64" - - cmake -B build -S . -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -D"babl_DIR=C:/msys64/mingw64" -D"CMAKE_INSTALL_PREFIX:PATH=$CI_PROJECT_DIR\build\install-x64" -D"OpenShotAudio_ROOT=$CI_PROJECT_DIR\build\install-x64" -D"PYTHON_MODULE_PATH=python" -D"RUBY_MODULE_PATH=ruby" -G "MinGW Makefiles" -D"CMAKE_BUILD_TYPE:STRING=Release" + - cmake -B build -S . -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -D"babl_DIR=C:/msys64/mingw64" -D"CMAKE_INSTALL_PREFIX:PATH=$CI_PROJECT_DIR\build\install-x64" -D"OpenShotAudio_ROOT=$CI_PROJECT_DIR\build\install-x64" -D"OpenCV_DIR:PATH=$env:OpenCV_DIR" -D"PYTHON_MODULE_PATH=python" -D"RUBY_MODULE_PATH=ruby" -G "MinGW Makefiles" -D"CMAKE_BUILD_TYPE:STRING=Release" - cmake --build build -j 4 - ctest --test-dir build --output-on-failure -VV - cmake --install build @@ -123,9 +124,11 @@ windows-builder-x86: - try { Invoke-WebRequest -Uri "http://gitlab.openshot.org/OpenShot/libopenshot-audio/-/jobs/artifacts/$CI_COMMIT_REF_NAME/download?job=windows-builder-x86" -Headers @{"PRIVATE-TOKEN"="$ACCESS_TOKEN"} -OutFile "artifacts.zip" } catch { $_.Exception.Response.StatusCode.Value__ } - if (-not (Test-Path "artifacts.zip")) { Invoke-WebRequest -Uri "http://gitlab.openshot.org/OpenShot/libopenshot-audio/-/jobs/artifacts/develop/download?job=windows-builder-x86" -Headers @{"PRIVATE-TOKEN"="$ACCESS_TOKEN"} -OutFile "artifacts.zip" } - Expand-Archive -Path artifacts.zip -DestinationPath . - - $env:Path = "C:\msys64\mingw32\bin;C:\msys64\usr\bin;C:\msys64\usr\local\bin;" + $env:Path; + - $env:OPENCV_ROOT = "C:\msys64\mingw32\opencv-4.13.0" + - $env:OpenCV_DIR = "$env:OPENCV_ROOT\lib\cmake\opencv4" + - $env:Path = "$env:OPENCV_ROOT\bin;C:\msys64\mingw32\bin;C:\msys64\usr\bin;C:\msys64\usr\local\bin;" + $env:Path; - $env:MSYSTEM = "MINGW32" - - cmake -B build -S . -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -D"babl_DIR=C:/msys64/mingw32" -D"CMAKE_INSTALL_PREFIX:PATH=$CI_PROJECT_DIR\build\install-x86" -D"OpenShotAudio_ROOT=$CI_PROJECT_DIR\build\install-x86" -D"PYTHON_MODULE_PATH=python" -D"RUBY_MODULE_PATH=ruby" -G "MinGW Makefiles" -D"CMAKE_BUILD_TYPE:STRING=Release" -D"CMAKE_CXX_FLAGS=-m32" -D"CMAKE_EXE_LINKER_FLAGS=-Wl,--large-address-aware" -D"CMAKE_C_FLAGS=-m32" + - cmake -B build -S . -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -D"babl_DIR=C:/msys64/mingw32" -D"CMAKE_INSTALL_PREFIX:PATH=$CI_PROJECT_DIR\build\install-x86" -D"OpenShotAudio_ROOT=$CI_PROJECT_DIR\build\install-x86" -D"OpenCV_DIR:PATH=$env:OpenCV_DIR" -D"PYTHON_MODULE_PATH=python" -D"RUBY_MODULE_PATH=ruby" -G "MinGW Makefiles" -D"CMAKE_BUILD_TYPE:STRING=Release" -D"CMAKE_CXX_FLAGS=-m32" -D"CMAKE_EXE_LINKER_FLAGS=-Wl,--large-address-aware" -D"CMAKE_C_FLAGS=-m32" - cmake --build build -j 4 - cmake --install build - $PROJECT_VERSION = (Select-String -Path "CMakeLists.txt" -Pattern '^set\(PROJECT_VERSION_FULL "(.*)\"' | %{$_.Matches.Groups[1].value}) From f7586af1be12b2f0f57f3a5277e2ad87cefa446a Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Wed, 13 May 2026 23:20:31 -0500 Subject: [PATCH 09/14] Fallback to CPU backend for OpenCV when CUDA backend not available. --- src/CVObjectDetection.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/CVObjectDetection.cpp b/src/CVObjectDetection.cpp index fa82eba5d..6ea301054 100644 --- a/src/CVObjectDetection.cpp +++ b/src/CVObjectDetection.cpp @@ -71,10 +71,19 @@ std::string CVObjectDetection::ValidateONNXModel(std::string modelPath) void CVObjectDetection::setProcessingDevice(){ if(processingDevice == "GPU"){ - net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); - net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); + try { + const std::vector targets = cv::dnn::getAvailableTargets(cv::dnn::DNN_BACKEND_CUDA); + if (std::find(targets.begin(), targets.end(), cv::dnn::DNN_TARGET_CUDA) != targets.end()) { + net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); + net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); + return; + } + } catch (const cv::Exception&) { + } + processingDevice = "CPU"; } - else if(processingDevice == "CPU"){ + + if(processingDevice == "CPU"){ net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); } From 8a5fd4d8b4fac34022eb9d52798e71955a6e9c0e Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Thu, 14 May 2026 12:17:24 -0500 Subject: [PATCH 10/14] Updating Mac builder to use OpenCV 4.13.0, and passing in correct paths for it in our CI build --- .gitlab-ci.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6d85ac65e..a8e497b48 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -59,6 +59,9 @@ linux-builder: mac-builder: stage: build-libopenshot + variables: + OPENCV_ROOT: /usr/local/openshot/opencv-4.13.0 + OpenCV_DIR: /usr/local/openshot/opencv-4.13.0/lib/cmake/opencv4 artifacts: expire_in: 6 months paths: @@ -70,8 +73,9 @@ mac-builder: - fi - unzip artifacts.zip - export LIBOPENSHOT_AUDIO_DIR=$CI_PROJECT_DIR/build/install-x64 + - export DYLD_LIBRARY_PATH=$OPENCV_ROOT/lib:$DYLD_LIBRARY_PATH - mkdir -p build; cd build; - - cmake -DCMAKE_EXE_LINKER_FLAGS="-stdlib=libc++" -DCMAKE_SHARED_LINKER_FLAGS="-stdlib=libc++" -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -D"CMAKE_INSTALL_PREFIX:PATH=$CI_PROJECT_DIR/build/install-x64" -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang -D"CMAKE_BUILD_TYPE:STRING=Release" -D"CMAKE_OSX_SYSROOT=/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.14.sdk" -D"CMAKE_OSX_DEPLOYMENT_TARGET=10.12" -DCMAKE_PREFIX_PATH=/usr/local/qt5.15.X/qt5.15/5.15.0/clang_64/ -D"CMAKE_INSTALL_RPATH_USE_LINK_PATH=1" -D"ENABLE_RUBY=0" ../ + - cmake -DCMAKE_EXE_LINKER_FLAGS="-stdlib=libc++" -DCMAKE_SHARED_LINKER_FLAGS="-stdlib=libc++" -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -D"CMAKE_INSTALL_PREFIX:PATH=$CI_PROJECT_DIR/build/install-x64" -D"OpenCV_DIR:PATH=$OpenCV_DIR" -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang -D"CMAKE_BUILD_TYPE:STRING=Release" -D"CMAKE_OSX_SYSROOT=/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.14.sdk" -D"CMAKE_OSX_DEPLOYMENT_TARGET=10.12" -DCMAKE_PREFIX_PATH=/usr/local/qt5.15.X/qt5.15/5.15.0/clang_64/ -D"CMAKE_INSTALL_RPATH_USE_LINK_PATH=1" -D"ENABLE_RUBY=0" ../ - make -j 9 - make install - ctest --output-on-failure -VV From d1aa207335bc590b110b804ebcbc301eb38c4a6e Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Thu, 14 May 2026 13:14:59 -0500 Subject: [PATCH 11/14] Updating GitLab CI to use parallel builds, dynamic cpu counts, so each build server is using all available cores. --- .gitlab-ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a8e497b48..5886c6a4b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -42,7 +42,7 @@ linux-builder: - export PATH="$OPENCV_ROOT/bin:$PATH" - mkdir -p build; cd build; - cmake -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -D"CMAKE_INSTALL_PREFIX:PATH=$CI_PROJECT_DIR/build/install-x64" -D"OpenCV_DIR:PATH=$OpenCV_DIR" -D"PYTHON_MODULE_PATH=python" -D"RUBY_MODULE_PATH=ruby" -DCMAKE_BUILD_TYPE:STRING=Release -DAPPIMAGE_BUILD=1 -DUSE_SYSTEM_JSONCPP=0 ../ - - make -j 4 + - cmake --build . --parallel $(nproc) - make install - ctest --output-on-failure -VV - make doc @@ -76,7 +76,7 @@ mac-builder: - export DYLD_LIBRARY_PATH=$OPENCV_ROOT/lib:$DYLD_LIBRARY_PATH - mkdir -p build; cd build; - cmake -DCMAKE_EXE_LINKER_FLAGS="-stdlib=libc++" -DCMAKE_SHARED_LINKER_FLAGS="-stdlib=libc++" -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -D"CMAKE_INSTALL_PREFIX:PATH=$CI_PROJECT_DIR/build/install-x64" -D"OpenCV_DIR:PATH=$OpenCV_DIR" -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang -D"CMAKE_BUILD_TYPE:STRING=Release" -D"CMAKE_OSX_SYSROOT=/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.14.sdk" -D"CMAKE_OSX_DEPLOYMENT_TARGET=10.12" -DCMAKE_PREFIX_PATH=/usr/local/qt5.15.X/qt5.15/5.15.0/clang_64/ -D"CMAKE_INSTALL_RPATH_USE_LINK_PATH=1" -D"ENABLE_RUBY=0" ../ - - make -j 9 + - cmake --build . --parallel $(sysctl -n hw.ncpu) - make install - ctest --output-on-failure -VV - PROJECT_VERSION=$(grep -E '^set\(PROJECT_VERSION_FULL "(.*)' ../CMakeLists.txt | awk '{print $2}' | tr -d '")') @@ -104,7 +104,7 @@ windows-builder-x64: - $env:Path = "$env:OPENCV_ROOT\bin;C:\msys64\mingw64\bin;C:\msys64\usr\bin;C:\msys64\usr\local\bin;" + $env:Path; - $env:MSYSTEM = "MINGW64" - cmake -B build -S . -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -D"babl_DIR=C:/msys64/mingw64" -D"CMAKE_INSTALL_PREFIX:PATH=$CI_PROJECT_DIR\build\install-x64" -D"OpenShotAudio_ROOT=$CI_PROJECT_DIR\build\install-x64" -D"OpenCV_DIR:PATH=$env:OpenCV_DIR" -D"PYTHON_MODULE_PATH=python" -D"RUBY_MODULE_PATH=ruby" -G "MinGW Makefiles" -D"CMAKE_BUILD_TYPE:STRING=Release" - - cmake --build build -j 4 + - cmake --build build --parallel $([Environment]::ProcessorCount) - ctest --test-dir build --output-on-failure -VV - cmake --install build - $PROJECT_VERSION = (Select-String -Path "CMakeLists.txt" -Pattern '^set\(PROJECT_VERSION_FULL "(.*)\"' | %{$_.Matches.Groups[1].value}) @@ -133,7 +133,7 @@ windows-builder-x86: - $env:Path = "$env:OPENCV_ROOT\bin;C:\msys64\mingw32\bin;C:\msys64\usr\bin;C:\msys64\usr\local\bin;" + $env:Path; - $env:MSYSTEM = "MINGW32" - cmake -B build -S . -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -D"babl_DIR=C:/msys64/mingw32" -D"CMAKE_INSTALL_PREFIX:PATH=$CI_PROJECT_DIR\build\install-x86" -D"OpenShotAudio_ROOT=$CI_PROJECT_DIR\build\install-x86" -D"OpenCV_DIR:PATH=$env:OpenCV_DIR" -D"PYTHON_MODULE_PATH=python" -D"RUBY_MODULE_PATH=ruby" -G "MinGW Makefiles" -D"CMAKE_BUILD_TYPE:STRING=Release" -D"CMAKE_CXX_FLAGS=-m32" -D"CMAKE_EXE_LINKER_FLAGS=-Wl,--large-address-aware" -D"CMAKE_C_FLAGS=-m32" - - cmake --build build -j 4 + - cmake --build build --parallel $([Environment]::ProcessorCount) - cmake --install build - $PROJECT_VERSION = (Select-String -Path "CMakeLists.txt" -Pattern '^set\(PROJECT_VERSION_FULL "(.*)\"' | %{$_.Matches.Groups[1].value}) - $PROJECT_SO = (Select-String -Path "CMakeLists.txt" -Pattern '^set\(PROJECT_SO_VERSION (.*)\)' | %{$_.Matches.Groups[1].value}) From 30e3ba6b34b0e83f002dc073337538d8de1d1910 Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Thu, 14 May 2026 13:20:35 -0500 Subject: [PATCH 12/14] Repair rpath for new OpenCV 4.13.0 version on Mac, during libopenshot build, so it's linked correctly. --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5886c6a4b..bbbdc779b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -78,6 +78,7 @@ mac-builder: - cmake -DCMAKE_EXE_LINKER_FLAGS="-stdlib=libc++" -DCMAKE_SHARED_LINKER_FLAGS="-stdlib=libc++" -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -D"CMAKE_INSTALL_PREFIX:PATH=$CI_PROJECT_DIR/build/install-x64" -D"OpenCV_DIR:PATH=$OpenCV_DIR" -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang -D"CMAKE_BUILD_TYPE:STRING=Release" -D"CMAKE_OSX_SYSROOT=/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.14.sdk" -D"CMAKE_OSX_DEPLOYMENT_TARGET=10.12" -DCMAKE_PREFIX_PATH=/usr/local/qt5.15.X/qt5.15/5.15.0/clang_64/ -D"CMAKE_INSTALL_RPATH_USE_LINK_PATH=1" -D"ENABLE_RUBY=0" ../ - cmake --build . --parallel $(sysctl -n hw.ncpu) - make install + - find "$CI_PROJECT_DIR/build/install-x64" -type f \( -name "*.dylib" -or -name "*.so" \) -exec sh -c 'for f do if ! otool -l "$f" | grep -q "$OPENCV_ROOT/lib"; then install_name_tool -add_rpath "$OPENCV_ROOT/lib" "$f" || true; fi; done' sh {} + - ctest --output-on-failure -VV - PROJECT_VERSION=$(grep -E '^set\(PROJECT_VERSION_FULL "(.*)' ../CMakeLists.txt | awk '{print $2}' | tr -d '")') - PROJECT_SO=$(grep -E '^set\(PROJECT_SO_VERSION (.*)' ../CMakeLists.txt | awk '{print $2}' | tr -d ')') From 9fbe600244e61f3d6c3e7d553081c56914769866 Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Fri, 15 May 2026 09:15:09 -0500 Subject: [PATCH 13/14] Updated compatibility for CVObjectDetection to support: - YOLOv5: [1, num_boxes, 5 + classes] - YOLO26 no-end2end export: [1, attributes, candidates], e.g. [1,116,8400] -Also kept the YOLOv5-seg safety fix: when a classes file exists, mask coefficients are not treated as extra classes. --- src/CVObjectDetection.cpp | 103 +++++++++++++++++++++++++++++++++++++- 1 file changed, 102 insertions(+), 1 deletion(-) diff --git a/src/CVObjectDetection.cpp b/src/CVObjectDetection.cpp index 6ea301054..40fbcd40e 100644 --- a/src/CVObjectDetection.cpp +++ b/src/CVObjectDetection.cpp @@ -28,6 +28,56 @@ using google::protobuf::util::TimeUtil; namespace { +bool LooksLikeTransposedYoloOutput(const cv::Mat& out, size_t classCount) +{ + // YOLO26 segmentation exports without end-to-end postprocessing use + // [1, attributes, candidates], e.g. [1, 116, 8400]: + // 4 box channels + class scores + optional mask coefficients. + return out.dims == 3 && out.size[0] == 1 && out.size[1] >= 4 && + out.size[2] > out.size[1] && + (classCount == 0 || out.size[1] >= 4 + static_cast(classCount)); +} + +cv::Rect ScaledXYWHBox( + float centerX, + float centerY, + float width, + float height, + const cv::Size& frameDims, + int inputWidth, + int inputHeight) +{ + if (centerX <= 1.0f && centerY <= 1.0f && width <= 1.0f && height <= 1.0f) { + centerX *= static_cast(frameDims.width); + width *= static_cast(frameDims.width); + centerY *= static_cast(frameDims.height); + height *= static_cast(frameDims.height); + } else { + const float xFactor = static_cast(frameDims.width) / static_cast(inputWidth); + const float yFactor = static_cast(frameDims.height) / static_cast(inputHeight); + centerX *= xFactor; + width *= xFactor; + centerY *= yFactor; + height *= yFactor; + } + + float left = centerX - width / 2.0f; + float top = centerY - height / 2.0f; + float right = centerX + width / 2.0f; + float bottom = centerY + height / 2.0f; + + left = std::max(0.0f, std::min(left, static_cast(frameDims.width - 1))); + top = std::max(0.0f, std::min(top, static_cast(frameDims.height - 1))); + right = std::max(0.0f, std::min(right, static_cast(frameDims.width))); + bottom = std::max(0.0f, std::min(bottom, static_cast(frameDims.height))); + + return cv::Rect( + static_cast(left), + static_cast(top), + std::max(0, static_cast(right - left)), + std::max(0, static_cast(bottom - top))); +} + std::string LoadONNXModel(std::string modelPath, cv::dnn::Net *net) { #if CV_VERSION_MAJOR < 4 || (CV_VERSION_MAJOR == 4 && CV_VERSION_MINOR < 3) @@ -205,6 +255,53 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector for (size_t i = 0; i < outs.size(); ++i) { cv::Mat det = outs[i]; + if (LooksLikeTransposedYoloOutput(det, classNames.size())) { + const int attributes = det.size[1]; + const int candidates = det.size[2]; + const int classCount = !classNames.empty() + ? static_cast(classNames.size()) + : attributes - 4; + const float* data = reinterpret_cast(det.data); + + for (int candidateIndex = 0; candidateIndex < candidates; ++candidateIndex) { + std::vector rowClassScores; + rowClassScores.reserve(maxClassCandidates); + + for (int classIndex = 0; classIndex < classCount; ++classIndex) { + const float classConfidence = data[(4 + classIndex) * candidates + candidateIndex]; + if (rowClassScores.size() < static_cast(maxClassCandidates)) { + rowClassScores.emplace_back(classIndex, classConfidence); + std::sort(rowClassScores.begin(), rowClassScores.end(), + [](const ClassScore& a, const ClassScore& b) { return a.score > b.score; }); + } else if (classConfidence > rowClassScores.back().score) { + rowClassScores.back() = ClassScore(classIndex, classConfidence); + std::sort(rowClassScores.begin(), rowClassScores.end(), + [](const ClassScore& a, const ClassScore& b) { return a.score > b.score; }); + } + } + + if (rowClassScores.empty() || rowClassScores.front().score <= confThreshold) { + continue; + } + + cv::Rect box = ScaledXYWHBox( + data[candidateIndex], + data[candidates + candidateIndex], + data[2 * candidates + candidateIndex], + data[3 * candidates + candidateIndex], + frameDims, inpWidth, inpHeight); + if (box.width <= 0 || box.height <= 0) { + continue; + } + + classIds.push_back(rowClassScores.front().classId); + confidences.push_back(rowClassScores.front().score); + boxes.push_back(box); + detectionClassScores.push_back(rowClassScores); + } + continue; + } + // YOLOv5 ONNX output is usually [1, num_boxes, num_classes + 5]. if (det.dims == 3) { det = det.reshape(1, det.size[1]); @@ -220,7 +317,11 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector for (int j = 0; j < det.rows; ++j, data += det.cols) { std::vector rowClassScores; rowClassScores.reserve(maxClassCandidates); - for (int classIndex = 5; classIndex < det.cols; ++classIndex) { + int classScoresEnd = det.cols; + if (!classNames.empty()) { + classScoresEnd = std::min(det.cols, 5 + static_cast(classNames.size())); + } + for (int classIndex = 5; classIndex < classScoresEnd; ++classIndex) { const float classConfidence = data[classIndex] * data[4]; if (rowClassScores.size() < static_cast(maxClassCandidates)) { rowClassScores.emplace_back(classIndex - 5, classConfidence); From ec370404d9989ce2f135b51db1449d1937344dee Mon Sep 17 00:00:00 2001 From: Jonathan Thomas Date: Fri, 15 May 2026 17:51:16 -0500 Subject: [PATCH 14/14] Add YOLO segmentation mask support to Object Detection - Support YOLO segmentation ONNX outputs, persist sparse RLE masks in object-detection protobuf data, and render configurable mask overlays in the Object Detection effect. - Also normalize tracked object classes at save time, smooth short mask gaps, and expose Draw Mask / Mask Color / Mask Alpha only when mask data is available. --- src/CVObjectDetection.cpp | 287 +++++++++++++++++++++++++++++++- src/CVObjectDetection.h | 25 ++- src/TrackedObjectBBox.cpp | 159 +++++++++++++++++- src/TrackedObjectBBox.h | 16 ++ src/TrackedObjectBase.cpp | 2 +- src/TrackedObjectBase.h | 3 + src/effects/ObjectDetection.cpp | 99 ++++++++++- src/effects/ObjectDetection.h | 6 +- src/objdetectdata.proto | 8 + tests/EffectMask.cpp | 201 ++++++++++++++++++++++ 10 files changed, 787 insertions(+), 19 deletions(-) diff --git a/src/CVObjectDetection.cpp b/src/CVObjectDetection.cpp index 40fbcd40e..dda0528b7 100644 --- a/src/CVObjectDetection.cpp +++ b/src/CVObjectDetection.cpp @@ -78,10 +78,146 @@ cv::Rect ScaledXYWHBox( std::max(0, static_cast(bottom - top))); } +std::vector EncodeBinaryMaskRLE(const std::vector& mask) +{ + std::vector rle; + if (mask.empty()) + return rle; + + uint8_t current = 0; + uint32_t count = 0; + for (uint8_t value : mask) { + value = value ? 1 : 0; + if (value == current) { + ++count; + } else { + rle.push_back(count); + current = value; + count = 1; + } + } + rle.push_back(count); + return rle; +} + +cv::Mat DecodeBinaryMaskRLE(const CVObjectMaskData& mask) +{ + cv::Mat image(mask.height, mask.width, CV_8UC1, cv::Scalar(0)); + if (!mask.HasData()) + return image; + + const int total = mask.width * mask.height; + int offset = 0; + bool value = false; + uint8_t* data = image.ptr(); + for (uint32_t count : mask.rle) { + const int end = std::min(total, offset + static_cast(count)); + if (value) { + std::fill(data + offset, data + end, static_cast(1)); + } + offset = end; + value = !value; + if (offset >= total) + break; + } + return image; +} + +CVObjectMaskData TransformMaskToBox( + const CVObjectMaskData& sourceMask, + const cv::Rect_& sourceBox, + const cv::Rect_& targetBox, + const cv::Size& frameDims) +{ + CVObjectMaskData result; + if (!sourceMask.HasData() || sourceBox.width <= 0.0f || sourceBox.height <= 0.0f || + targetBox.width <= 0.0f || targetBox.height <= 0.0f || + frameDims.width <= 0 || frameDims.height <= 0) { + return result; + } + + const float scaleX = sourceMask.width / static_cast(frameDims.width); + const float scaleY = sourceMask.height / static_cast(frameDims.height); + const cv::Rect_ sourceMaskBox( + sourceBox.x * scaleX, + sourceBox.y * scaleY, + sourceBox.width * scaleX, + sourceBox.height * scaleY); + const cv::Rect_ targetMaskBox( + targetBox.x * scaleX, + targetBox.y * scaleY, + targetBox.width * scaleX, + targetBox.height * scaleY); + if (sourceMaskBox.width <= 0.0f || sourceMaskBox.height <= 0.0f) + return result; + + const double xScale = targetMaskBox.width / sourceMaskBox.width; + const double yScale = targetMaskBox.height / sourceMaskBox.height; + cv::Mat transform = (cv::Mat_(2, 3) << + xScale, 0.0, targetMaskBox.x - xScale * sourceMaskBox.x, + 0.0, yScale, targetMaskBox.y - yScale * sourceMaskBox.y); + + cv::Mat source = DecodeBinaryMaskRLE(sourceMask); + cv::Mat transformed; + cv::warpAffine( + source, transformed, transform, source.size(), + cv::INTER_NEAREST, cv::BORDER_CONSTANT, cv::Scalar(0)); + if (cv::countNonZero(transformed) == 0) + return result; + + result.width = sourceMask.width; + result.height = sourceMask.height; + result.rle = EncodeBinaryMaskRLE( + std::vector(transformed.data, transformed.data + transformed.total())); + return result; +} + +CVObjectMaskData BuildMaskFromPrototype( + const cv::Mat& prototype, + const std::vector& coefficients, + const cv::Rect& box, + const cv::Size& frameDims) +{ + CVObjectMaskData result; + if (prototype.dims != 4 || prototype.size[0] != 1 || + prototype.size[1] != static_cast(coefficients.size())) + return result; + + const int channels = prototype.size[1]; + const int maskHeight = prototype.size[2]; + const int maskWidth = prototype.size[3]; + const int maskPixels = maskWidth * maskHeight; + const float* protoData = reinterpret_cast(prototype.data); + + const int left = std::max(0, static_cast(box.x * maskWidth / static_cast(frameDims.width))); + const int top = std::max(0, static_cast(box.y * maskHeight / static_cast(frameDims.height))); + const int right = std::min(maskWidth, static_cast((box.x + box.width) * maskWidth / static_cast(frameDims.width))); + const int bottom = std::min(maskHeight, static_cast((box.y + box.height) * maskHeight / static_cast(frameDims.height))); + if (left >= right || top >= bottom) + return result; + + std::vector binary(maskPixels, 0); + for (int y = top; y < bottom; ++y) { + for (int x = left; x < right; ++x) { + const int pixel = y * maskWidth + x; + float value = 0.0f; + for (int channel = 0; channel < channels; ++channel) { + value += coefficients[channel] * protoData[channel * maskPixels + pixel]; + } + binary[pixel] = value > 0.0f ? 1 : 0; + } + } + + result.width = maskWidth; + result.height = maskHeight; + result.rle = EncodeBinaryMaskRLE(binary); + return result; +} + std::string LoadONNXModel(std::string modelPath, cv::dnn::Net *net) { #if CV_VERSION_MAJOR < 4 || (CV_VERSION_MAJOR == 4 && CV_VERSION_MINOR < 3) - return std::string("Failed to load ONNX model: YOLOv5 requires OpenCV 4.3.0 or newer. " + return std::string("Failed to load ONNX model: YOLO requires OpenCV 4.3.0 or newer. " "This OpenCV build is ") + CV_VERSION + "."; #else try { @@ -108,8 +244,8 @@ std::string LoadONNXModel(std::string modelPath, cv::dnn::Net *net) } CVObjectDetection::CVObjectDetection(std::string processInfoJson, ProcessingController &processingController) -: processingController(&processingController), processingDevice("CPU"), inpWidth(640), inpHeight(640){ - confThreshold = 0.25; +: processingController(&processingController), processingDevice("CPU"), inpWidth(640), inpHeight(640), generateMasks(true){ + confThreshold = 0.10; nmsThreshold = 0.1; SetJson(processInfoJson); } @@ -153,7 +289,7 @@ void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start, processingController->SetError(false, ""); if(modelPath.empty()) { - processingController->SetError(true, "Missing path to YOLOv5 ONNX model file"); + processingController->SetError(true, "Missing path to YOLO ONNX model file"); error = true; return; } @@ -165,7 +301,7 @@ void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start, std::ifstream model_file(modelPath); if(!model_file.good()){ - processingController->SetError(true, "Incorrect path to YOLOv5 ONNX model file"); + processingController->SetError(true, "Incorrect path to YOLO ONNX model file"); error = true; return; } @@ -249,6 +385,7 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector std::vector confidences; std::vector boxes; std::vector> detectionClassScores; + std::vector detectionMasks; std::vector objectIds; const int maxClassCandidates = 5; @@ -261,6 +398,16 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector const int classCount = !classNames.empty() ? static_cast(classNames.size()) : attributes - 4; + const int maskCoefficientCount = attributes - 4 - classCount; + const cv::Mat* prototype = nullptr; + if (generateMasks && maskCoefficientCount > 0) { + for (const auto& out : outs) { + if (out.dims == 4 && out.size[0] == 1 && out.size[1] == maskCoefficientCount) { + prototype = &out; + break; + } + } + } const float* data = reinterpret_cast(det.data); for (int candidateIndex = 0; candidateIndex < candidates; ++candidateIndex) { @@ -298,11 +445,21 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector confidences.push_back(rowClassScores.front().score); boxes.push_back(box); detectionClassScores.push_back(rowClassScores); + if (prototype) { + std::vector coefficients; + coefficients.reserve(maskCoefficientCount); + for (int coefficientIndex = 0; coefficientIndex < maskCoefficientCount; ++coefficientIndex) { + coefficients.push_back(data[(4 + classCount + coefficientIndex) * candidates + candidateIndex]); + } + detectionMasks.push_back(BuildMaskFromPrototype(*prototype, coefficients, box, frameDims)); + } else { + detectionMasks.push_back({}); + } } continue; } - // YOLOv5 ONNX output is usually [1, num_boxes, num_classes + 5]. + // YOLOv5-style ONNX output is usually [1, num_boxes, num_classes + 5]. if (det.dims == 3) { det = det.reshape(1, det.size[1]); } @@ -364,6 +521,7 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector confidences.push_back(confidence); boxes.push_back(cv::Rect(left, top, width, height)); detectionClassScores.push_back(rowClassScores); + detectionMasks.push_back({}); } } } @@ -378,16 +536,19 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector std::vector sortConfidences; std::vector sortClassIds; std::vector> sortClassScores; + std::vector sortMasks; for(auto index : indices) { sortBoxes.push_back(boxes[index]); sortConfidences.push_back(confidences[index]); sortClassIds.push_back(classIds[index]); sortClassScores.push_back(detectionClassScores[index]); + sortMasks.push_back(index < static_cast(detectionMasks.size()) ? detectionMasks[index] : CVObjectMaskData()); } sort.update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), sortConfidences, sortClassIds, sortClassScores); // Clear data vectors boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear(); + std::vector masks; // Get SORT predicted boxes for(auto TBox : sort.frameTrackingResult){ if(TBox.frame == frameId){ @@ -395,6 +556,35 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector confidences.push_back(TBox.confidence); classIds.push_back(TBox.classId); objectIds.push_back(TBox.id); + CVObjectMaskData mask; + double bestIoU = 0.0; + for (size_t maskIndex = 0; maskIndex < sortMasks.size(); ++maskIndex) { + if (!sortMasks[maskIndex].HasData() || sortClassIds[maskIndex] != TBox.classId) + continue; + double score = SortTracker::GetIOU(cv::Rect_(sortBoxes[maskIndex]), TBox.box); + if (score > bestIoU) { + bestIoU = score; + mask = sortMasks[maskIndex]; + } + } + if (mask.HasData()) { + recentObjectMasks[TBox.id] = CVTrackedMaskData{frameId, mask, TBox.box}; + } else { + const auto recentMask = recentObjectMasks.find(TBox.id); + if (recentMask != recentObjectMasks.end() && + frameId > recentMask->second.frameId && + frameId - recentMask->second.frameId <= 5) { + mask = TransformMaskToBox( + recentMask->second.mask, + recentMask->second.box, + TBox.box, + frameDims); + if (mask.HasData()) { + recentObjectMasks[TBox.id] = CVTrackedMaskData{frameId, mask, TBox.box}; + } + } + } + masks.push_back(mask); } } @@ -411,6 +601,7 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector classIds.erase(classIds.begin() + j); confidences.erase(confidences.begin() + j); objectIds.erase(objectIds.begin() + j); + masks.erase(masks.begin() + j); break; } else{ @@ -418,6 +609,7 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector classIds.erase(classIds.begin() + i); confidences.erase(confidences.begin() + i); objectIds.erase(objectIds.begin() + i); + masks.erase(masks.begin() + i); i = 0; break; } @@ -437,6 +629,7 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector classIds.erase(classIds.begin() + j); confidences.erase(confidences.begin() + j); objectIds.erase(objectIds.begin() + j); + masks.erase(masks.begin() + j); break; } else{ @@ -444,6 +637,7 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector classIds.erase(classIds.begin() + i); confidences.erase(confidences.begin() + i); objectIds.erase(objectIds.begin() + i); + masks.erase(masks.begin() + i); i = 0; break; } @@ -463,7 +657,7 @@ void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector normalized_boxes.push_back(normalized_box); } - detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId, objectIds); + detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId, objectIds, masks); } // Compute IOU between 2 boxes @@ -518,12 +712,65 @@ CVDetectionData CVObjectDetection::GetDetectionData(size_t frameId){ } } +void CVObjectDetection::NormalizeTrackedClasses() +{ + struct ClassEvidence { + float confidenceSum = 0.0f; + size_t count = 0; + }; + + std::map> objectClassEvidence; + for (const auto& frameData : detectionsData) { + const CVDetectionData& detections = frameData.second; + const size_t detectionCount = std::min(detections.objectIds.size(), detections.classIds.size()); + for (size_t i = 0; i < detectionCount; ++i) { + const float confidence = i < detections.confidences.size() ? detections.confidences[i] : 1.0f; + ClassEvidence& evidence = objectClassEvidence[detections.objectIds[i]][detections.classIds[i]]; + evidence.confidenceSum += confidence; + ++evidence.count; + } + } + + std::map dominantClassByObject; + for (const auto& objectEvidence : objectClassEvidence) { + const int objectId = objectEvidence.first; + int bestClassId = -1; + ClassEvidence bestEvidence; + for (const auto& classEvidence : objectEvidence.second) { + const int classId = classEvidence.first; + const ClassEvidence& evidence = classEvidence.second; + if (bestClassId < 0 || + evidence.confidenceSum > bestEvidence.confidenceSum || + (evidence.confidenceSum == bestEvidence.confidenceSum && evidence.count > bestEvidence.count)) { + bestClassId = classId; + bestEvidence = evidence; + } + } + if (bestClassId >= 0) { + dominantClassByObject[objectId] = bestClassId; + } + } + + for (auto& frameData : detectionsData) { + CVDetectionData& detections = frameData.second; + const size_t detectionCount = std::min(detections.objectIds.size(), detections.classIds.size()); + for (size_t i = 0; i < detectionCount; ++i) { + const auto dominantClass = dominantClassByObject.find(detections.objectIds[i]); + if (dominantClass != dominantClassByObject.end()) { + detections.classIds[i] = dominantClass->second; + } + } + } +} + bool CVObjectDetection::SaveObjDetectedData(){ if(protobuf_data_path.empty()) { cerr << "Missing path to object detection protobuf data file." << endl; return false; } + NormalizeTrackedClasses(); + // Create tracker message pb_objdetect::ObjDetect objMessage; @@ -576,6 +823,14 @@ void CVObjectDetection::AddFrameDataToProto(pb_objdetect::Frame* pbFrameData, CV box->set_confidence(dData.confidences.at(i)); box->set_objectid(dData.objectIds.at(i)); + if (i < dData.masks.size() && dData.masks.at(i).HasData()) { + pb_objdetect::Frame_Box_Mask* mask = box->mutable_mask(); + mask->set_width(dData.masks.at(i).width); + mask->set_height(dData.masks.at(i).height); + for (uint32_t count : dData.masks.at(i).rle) { + mask->add_rle(count); + } + } } } @@ -647,6 +902,12 @@ void CVObjectDetection::SetJsonValue(const Json::Value root) { if (!root["nms_threshold"].isNull()){ nmsThreshold = root["nms_threshold"].asFloat(); } + if (!root["generate-masks"].isNull()){ + generateMasks = root["generate-masks"].asBool(); + } + if (!root["generate_masks"].isNull()){ + generateMasks = root["generate_masks"].asBool(); + } } /* @@ -698,6 +959,7 @@ bool CVObjectDetection::_LoadObjDetectdData(){ std::vector confidences; std::vector> boxes; std::vector objectIds; + std::vector masks; for(int i = 0; i < pbFrameData.bounding_box_size(); i++){ // Get bounding box coordinates @@ -714,10 +976,19 @@ bool CVObjectDetection::_LoadObjDetectdData(){ // Push back data into vectors boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence); objectIds.push_back(objectId); + CVObjectMaskData mask; + if (pBox.Get(i).has_mask()) { + mask.width = pBox.Get(i).mask().width(); + mask.height = pBox.Get(i).mask().height(); + for (int rleIndex = 0; rleIndex < pBox.Get(i).mask().rle_size(); ++rleIndex) { + mask.rle.push_back(pBox.Get(i).mask().rle(rleIndex)); + } + } + masks.push_back(mask); } // Assign data to object detector map - detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id, objectIds); + detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id, objectIds, masks); } // Delete all global objects allocated by libprotobuf. diff --git a/src/CVObjectDetection.h b/src/CVObjectDetection.h index d993817ee..50cc59ec3 100644 --- a/src/CVObjectDetection.h +++ b/src/CVObjectDetection.h @@ -33,6 +33,20 @@ namespace pb_objdetect { namespace openshot { + struct CVObjectMaskData{ + int width = 0; + int height = 0; + std::vector rle; + + bool HasData() const { return width > 0 && height > 0 && !rle.empty(); } + }; + + struct CVTrackedMaskData{ + size_t frameId = 0; + CVObjectMaskData mask; + cv::Rect_ box; + }; + // Stores the detected object bounding boxes and its properties. struct CVDetectionData{ CVDetectionData(){} @@ -41,25 +55,28 @@ namespace openshot std::vector _confidences, std::vector> _boxes, size_t _frameId, - std::vector _objectIds) + std::vector _objectIds, + std::vector _masks = {}) { classIds = _classIds; confidences = _confidences; boxes = _boxes; frameId = _frameId; objectIds = _objectIds; + masks = _masks; } size_t frameId; std::vector classIds; std::vector confidences; std::vector> boxes; std::vector objectIds; + std::vector masks; }; /** * @brief This class runs trought a clip to detect objects and returns the bounding boxes and its properties. * - * Object detection is performed using a YOLOv5 ONNX model with OpenCV DNN module. + * Object detection is performed using a supported YOLO ONNX model with OpenCV DNN module. */ class CVObjectDetection{ @@ -75,8 +92,10 @@ namespace openshot std::string protobuf_data_path; int inpWidth; int inpHeight; + bool generateMasks; SortTracker sort; + std::map recentObjectMasks; uint progress; @@ -98,6 +117,8 @@ namespace openshot // Remove the bounding boxes with low confidence using non-maxima suppression void postprocess(const cv::Size &frameDims, const std::vector& out, size_t frame_number); + void NormalizeTrackedClasses(); + // Get the names of the output layers std::vector getOutputsNames(const cv::dnn::Net& net); diff --git a/src/TrackedObjectBBox.cpp b/src/TrackedObjectBBox.cpp index 80c77f251..bbd566a65 100644 --- a/src/TrackedObjectBBox.cpp +++ b/src/TrackedObjectBBox.cpp @@ -27,6 +27,93 @@ using google::protobuf::util::TimeUtil; using namespace openshot; +namespace { +std::vector encode_object_mask_rle(const std::vector& mask) +{ + std::vector rle; + uint8_t current = 0; + uint32_t count = 0; + for (uint8_t value : mask) { + value = value ? 1 : 0; + if (value == current) { + ++count; + } else { + rle.push_back(count); + current = value; + count = 1; + } + } + rle.push_back(count); + return rle; +} + +std::vector decode_object_mask_rle(const ObjectMaskData& mask) +{ + std::vector decoded(static_cast(mask.width * mask.height), 0); + int offset = 0; + bool value = false; + for (uint32_t count : mask.rle) { + const int end = std::min(mask.width * mask.height, offset + static_cast(count)); + if (value) + std::fill(decoded.begin() + offset, decoded.begin() + end, static_cast(1)); + offset = end; + value = !value; + if (offset >= mask.width * mask.height) + break; + } + return decoded; +} + +ObjectMaskData transform_mask_between_boxes( + const ObjectMaskData& source_mask, + const BBox& source_box, + const BBox& target_box) +{ + ObjectMaskData result; + if (!source_mask.HasData() || + source_box.width <= 0.0f || source_box.height <= 0.0f || + target_box.width <= 0.0f || target_box.height <= 0.0f) + return result; + + const float source_left = (source_box.cx - source_box.width / 2.0f) * source_mask.width; + const float source_top = (source_box.cy - source_box.height / 2.0f) * source_mask.height; + const float source_width = source_box.width * source_mask.width; + const float source_height = source_box.height * source_mask.height; + const float target_left = (target_box.cx - target_box.width / 2.0f) * source_mask.width; + const float target_top = (target_box.cy - target_box.height / 2.0f) * source_mask.height; + const float target_width = target_box.width * source_mask.width; + const float target_height = target_box.height * source_mask.height; + if (source_width <= 0.0f || source_height <= 0.0f || target_width <= 0.0f || target_height <= 0.0f) + return result; + + const std::vector source = decode_object_mask_rle(source_mask); + std::vector transformed(static_cast(source_mask.width * source_mask.height), 0); + const int min_x = std::max(0, static_cast(std::floor(target_left))); + const int min_y = std::max(0, static_cast(std::floor(target_top))); + const int max_x = std::min(source_mask.width, static_cast(std::ceil(target_left + target_width))); + const int max_y = std::min(source_mask.height, static_cast(std::ceil(target_top + target_height))); + for (int y = min_y; y < max_y; ++y) { + for (int x = min_x; x < max_x; ++x) { + const float source_x = source_left + (static_cast(x) - target_left) * source_width / target_width; + const float source_y = source_top + (static_cast(y) - target_top) * source_height / target_height; + const int sx = static_cast(std::round(source_x)); + const int sy = static_cast(std::round(source_y)); + if (sx < 0 || sx >= source_mask.width || sy < 0 || sy >= source_mask.height) + continue; + if (source[static_cast(sy * source_mask.width + sx)]) + transformed[static_cast(y * source_mask.width + x)] = 1; + } + } + + if (std::none_of(transformed.begin(), transformed.end(), [](uint8_t value) { return value != 0; })) + return result; + result.width = source_mask.width; + result.height = source_mask.height; + result.rle = encode_object_mask_rle(transformed); + return result; +} +} + // Default Constructor, delegating TrackedObjectBBox::TrackedObjectBBox() : TrackedObjectBBox::TrackedObjectBBox(0, 0, 255, 255) {} @@ -37,9 +124,11 @@ TrackedObjectBBox::TrackedObjectBBox(int Red, int Green, int Blue, int Alfa) : delta_x(0.0), delta_y(0.0), scale_x(1.0), scale_y(1.0), background_alpha(0.0), background_corner(12), + mask_alpha(120.0 / 255.0), stroke_width(2) , stroke_alpha(0.7), stroke(Red, Green, Blue, Alfa), - background(Red, Green, Blue, Alfa) + background(Red, Green, Blue, Alfa), + mask_color(Red, Green, Blue, Alfa) { this->TimeScale = 1.0; } @@ -71,6 +160,50 @@ void TrackedObjectBBox::AddBox(int64_t _frame_num, float _cx, float _cy, float _ } } +void TrackedObjectBBox::AddMask(int64_t frame_num, const ObjectMaskData& mask) +{ + if (frame_num < 0 || !mask.HasData()) + return; + + double time = FrameNToTime(frame_num, 1.0); + MaskVec[time] = mask; +} + +bool TrackedObjectBBox::HasMask(int64_t frame_num, int64_t max_frame_gap) const +{ + return GetMask(frame_num, max_frame_gap).HasData(); +} + +bool TrackedObjectBBox::HasMaskData() const +{ + return !MaskVec.empty(); +} + +ObjectMaskData TrackedObjectBBox::GetMask(int64_t frame_num, int64_t max_frame_gap) const +{ + double time = FrameNToTime(frame_num, 1.0); + auto it = MaskVec.find(time); + if (it != MaskVec.end()) + return it->second; + if (max_frame_gap <= 0 || MaskVec.empty()) + return {}; + + auto after = MaskVec.lower_bound(time); + if (after == MaskVec.begin()) + return {}; + + auto before = std::prev(after); + double max_gap_time = FrameNToTime(max_frame_gap, 1.0) - FrameNToTime(0, 1.0); + if (time - before->first <= max_gap_time + 0.000001) { + auto source_box = BoxVec.find(before->first); + auto target_box = BoxVec.find(time); + if (source_box != BoxVec.end() && target_box != BoxVec.end()) + return transform_mask_between_boxes(before->second, source_box->second, target_box->second); + return before->second; + } + return {}; +} + // Get the size of BoxVec map int64_t TrackedObjectBBox::GetLength() const { @@ -334,6 +467,7 @@ bool TrackedObjectBBox::LoadBoxData(std::string inputFilePath) void TrackedObjectBBox::clear() { BoxVec.clear(); + MaskVec.clear(); } // Generate JSON string of this object @@ -363,6 +497,11 @@ Json::Value TrackedObjectBBox::JsonValue() const root["visible"] = visible.JsonValue(); root["draw_box"] = draw_box.JsonValue(); root["draw_text"] = draw_text.JsonValue(); + if (!MaskVec.empty()) { + root["draw_mask"] = draw_mask.JsonValue(); + root["mask_alpha"] = mask_alpha.JsonValue(); + root["mask_color"] = mask_color.JsonValue(); + } root["stroke"] = stroke.JsonValue(); root["background_alpha"] = background_alpha.JsonValue(); root["background_corner"] = background_corner.JsonValue(); @@ -433,6 +572,12 @@ void TrackedObjectBBox::SetJsonValue(const Json::Value root) draw_box.SetJsonValue(root["draw_box"]); if (!root["draw_text"].isNull()) draw_text.SetJsonValue(root["draw_text"]); + if (!root["draw_mask"].isNull()) + draw_mask.SetJsonValue(root["draw_mask"]); + if (!root["mask_alpha"].isNull()) + mask_alpha.SetJsonValue(root["mask_alpha"]); + if (!root["mask_color"].isNull()) + mask_color.SetJsonValue(root["mask_color"]); if (!root["stroke"].isNull()) stroke.SetJsonValue(root["stroke"]); if (!root["background_alpha"].isNull()) @@ -480,6 +625,18 @@ Json::Value TrackedObjectBBox::PropertiesJSON(int64_t requested_frame) const root["draw_text"]["choices"].append(add_property_choice_json("Yes", true, draw_text.GetValue(requested_frame))); root["draw_text"]["choices"].append(add_property_choice_json("No", false, draw_text.GetValue(requested_frame))); + if (HasMaskData()) { + root["draw_mask"] = add_property_json("Draw Mask", draw_mask.GetValue(requested_frame), "int", "", &draw_mask, 0, 1, false, requested_frame); + root["draw_mask"]["choices"].append(add_property_choice_json("Yes", true, draw_mask.GetValue(requested_frame))); + root["draw_mask"]["choices"].append(add_property_choice_json("No", false, draw_mask.GetValue(requested_frame))); + + root["mask_color"] = add_property_json("Mask Color", 0.0, "color", "", NULL, 0, 255, false, requested_frame); + root["mask_color"]["red"] = add_property_json("Red", mask_color.red.GetValue(requested_frame), "float", "", &mask_color.red, 0, 255, false, requested_frame); + root["mask_color"]["blue"] = add_property_json("Blue", mask_color.blue.GetValue(requested_frame), "float", "", &mask_color.blue, 0, 255, false, requested_frame); + root["mask_color"]["green"] = add_property_json("Green", mask_color.green.GetValue(requested_frame), "float", "", &mask_color.green, 0, 255, false, requested_frame); + root["mask_alpha"] = add_property_json("Mask Alpha", mask_alpha.GetValue(requested_frame), "float", "", &mask_alpha, 0.0, 1.0, false, requested_frame); + } + root["stroke"] = add_property_json("Border", 0.0, "color", "", NULL, 0, 255, false, requested_frame); root["stroke"]["red"] = add_property_json("Red", stroke.red.GetValue(requested_frame), "float", "", &stroke.red, 0, 255, false, requested_frame); root["stroke"]["blue"] = add_property_json("Blue", stroke.blue.GetValue(requested_frame), "float", "", &stroke.blue, 0, 255, false, requested_frame); diff --git a/src/TrackedObjectBBox.h b/src/TrackedObjectBBox.h index b250c15a2..64375367c 100644 --- a/src/TrackedObjectBBox.h +++ b/src/TrackedObjectBBox.h @@ -24,6 +24,15 @@ namespace openshot { + struct ObjectMaskData + { + int width = 0; + int height = 0; + std::vector rle; + + bool HasData() const { return width > 0 && height > 0 && !rle.empty(); } + }; + /** * @brief This struct holds the information of a bounding-box. * @@ -135,16 +144,19 @@ namespace openshot public: std::map BoxVec; ///< Index the bounding-box by time of each frame + std::map MaskVec; ///< Index optional object masks by time of each frame Keyframe delta_x; ///< X-direction displacement Keyframe Keyframe delta_y; ///< Y-direction displacement Keyframe Keyframe scale_x; ///< X-direction scale Keyframe Keyframe scale_y; ///< Y-direction scale Keyframe Keyframe background_alpha; ///< Background box opacity Keyframe background_corner; ///< Radius of rounded corners + Keyframe mask_alpha; ///< Object mask overlay opacity Keyframe stroke_width; ///< Thickness of border line Keyframe stroke_alpha; ///< Stroke box opacity Color stroke; ///< Border line color Color background; ///< Background fill color + Color mask_color; ///< Object mask overlay color std::string protobufDataPath; ///< Path to the protobuf file that holds the bounding box points across the frames @@ -154,6 +166,10 @@ namespace openshot /// Add a BBox to the BoxVec map void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) override; + void AddMask(int64_t frame_num, const ObjectMaskData& mask); + bool HasMask(int64_t frame_number, int64_t max_frame_gap = 0) const; + bool HasMaskData() const; + ObjectMaskData GetMask(int64_t frame_number, int64_t max_frame_gap = 0) const; /// Update object's BaseFps void SetBaseFPS(Fraction fps); diff --git a/src/TrackedObjectBase.cpp b/src/TrackedObjectBase.cpp index 818cb8686..2fd4bec2c 100644 --- a/src/TrackedObjectBase.cpp +++ b/src/TrackedObjectBase.cpp @@ -23,7 +23,7 @@ namespace openshot // Constructor TrackedObjectBase::TrackedObjectBase(std::string _id) - : visible(1.0), draw_box(1), draw_text(1), id(_id) {} + : visible(1.0), draw_box(1), draw_text(1), draw_mask(1), id(_id) {} Json::Value TrackedObjectBase::add_property_choice_json( std::string name, int value, int selected_value) const diff --git a/src/TrackedObjectBase.h b/src/TrackedObjectBase.h index f10a5bee2..c024d070d 100644 --- a/src/TrackedObjectBase.h +++ b/src/TrackedObjectBase.h @@ -48,6 +48,9 @@ namespace openshot { /// Keyframe to determine if a specific object label is drawn (or hidden) Keyframe draw_text; + /// Keyframe to determine if a specific object mask is drawn (when mask data exists) + Keyframe draw_mask; + /// Default constructor TrackedObjectBase(); diff --git a/src/effects/ObjectDetection.cpp b/src/effects/ObjectDetection.cpp index d3c4a4c14..cb988b791 100644 --- a/src/effects/ObjectDetection.cpp +++ b/src/effects/ObjectDetection.cpp @@ -39,15 +39,33 @@ bool is_all_objects_key(const std::string& name) } std::shared_ptr make_all_objects_properties( - const std::shared_ptr& source) + const std::shared_ptr& source, + bool has_mask_data = false) { auto properties = std::make_shared(); - if (source) + if (source) { properties->SetJsonValue(source->JsonValue()); + auto source_bbox = std::dynamic_pointer_cast(source); + has_mask_data = has_mask_data || (source_bbox && source_bbox->HasMaskData()); + } + if (has_mask_data) { + properties->AddMask(0, ObjectMaskData{1, 1, {0, 1}}); + } properties->Id("All Objects"); return properties; } +bool has_tracked_object_mask_data( + const std::map>& tracked_objects) +{ + for (const auto& tracked_object : tracked_objects) { + auto bbox = std::dynamic_pointer_cast(tracked_object.second); + if (bbox && bbox->HasMaskData()) + return true; + } + return false; +} + cv::Scalar default_class_color(const std::string& class_name, int index) { const QString normalized = QString::fromStdString(class_name).trimmed().toLower(); @@ -75,6 +93,30 @@ cv::Scalar default_class_color(const std::string& class_name, int index) }; return palette[index % (sizeof(palette) / sizeof(palette[0]))]; } + +QImage alpha_mask_image_from_rle(const ObjectMaskData& mask) +{ + QImage image(mask.width, mask.height, QImage::Format_ARGB32_Premultiplied); + image.fill(Qt::transparent); + if (!mask.HasData()) + return image; + + QRgb* data = reinterpret_cast(image.bits()); + const int total = mask.width * mask.height; + int offset = 0; + bool value = false; + for (uint32_t count : mask.rle) { + const int end = std::min(total, offset + static_cast(count)); + if (value) { + std::fill(data + offset, data + end, qRgba(255, 255, 255, 255)); + } + offset = end; + value = !value; + if (offset >= total) + break; + } + return image; +} } @@ -164,6 +206,24 @@ std::shared_ptr ObjectDetection::GetFrame(std::shared_ptr frame, i painter.drawRoundedRect(boxRect, bg_corner, bg_corner); } + ObjectMaskData object_mask = trackedObject->GetMask(frame_number, 5); + if (object_mask.HasData() && trackedObject->draw_mask.GetValue(frame_number) == 1) { + QImage mask = alpha_mask_image_from_rle(object_mask) + .scaled(frame_image->size(), Qt::IgnoreAspectRatio, Qt::SmoothTransformation); + std::vector mask_rgba = trackedObject->mask_color.GetColorRGBA(frame_number); + float mask_alpha = trackedObject->mask_alpha.GetValue(frame_number); + QColor mask_color(mask_rgba[0], mask_rgba[1], mask_rgba[2], 255 * mask_alpha); + QImage overlay(frame_image->size(), QImage::Format_ARGB32_Premultiplied); + overlay.fill(Qt::transparent); + QPainter overlay_painter(&overlay); + overlay_painter.setCompositionMode(QPainter::CompositionMode_Source); + overlay_painter.fillRect(overlay.rect(), mask_color); + overlay_painter.setCompositionMode(QPainter::CompositionMode_DestinationIn); + overlay_painter.drawImage(0, 0, mask); + overlay_painter.end(); + painter.drawImage(0, 0, overlay); + } + if(display_box_text.GetValue(frame_number) == 1 && trackedObject->draw_text.GetValue(frame_number) == 1) { // Draw text label above bounding box // Get the confidence and classId for the current detection @@ -236,6 +296,7 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath) std::vector confidences; std::vector> boxes; std::vector objectIds; + std::vector masks; // For each bounding box in this frame for (int di = 0; di < pbFrame.bounding_box_size(); ++di) { @@ -244,17 +305,29 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath) int classId = b.classid(); float confidence= b.confidence(); int objectId = b.objectid(); + ObjectMaskData mask; + if (b.has_mask()) { + mask.width = b.mask().width(); + mask.height = b.mask().height(); + for (int rleIndex = 0; rleIndex < b.mask().rle_size(); ++rleIndex) { + mask.rle.push_back(b.mask().rle(rleIndex)); + } + } // Record for DetectionData classIds.push_back(classId); confidences.push_back(confidence); boxes.emplace_back(x, y, w, h); objectIds.push_back(objectId); + masks.push_back(mask); // Either append to an existing TrackedObjectBBox… auto it = trackedObjects.find(objectId); if (it != trackedObjects.end()) { it->second->AddBox(frameId, x + w/2, y + h/2, w, h, 0.0); + auto bbox = std::dynamic_pointer_cast(it->second); + if (bbox && mask.HasData()) + bbox->AddMask(frameId, mask); } else { // …or create a brand-new one @@ -267,6 +340,8 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath) tmpObj.stroke_alpha = Keyframe(1.0); tmpObj.background_alpha = Keyframe(0.15); tmpObj.AddBox(frameId, x + w/2, y + h/2, w, h, 0.0); + if (mask.HasData()) + tmpObj.AddMask(frameId, mask); auto ptr = std::make_shared(tmpObj); ptr->ParentClip(this->ParentClip()); @@ -280,9 +355,9 @@ bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath) } } - // Save the DetectionData for this frame + // Save the DetectionData for this frame detectionsData[frameId] = DetectionData( - classIds, confidences, boxes, frameId, objectIds + classIds, confidences, boxes, frameId, objectIds, masks ); } @@ -406,6 +481,15 @@ std::shared_ptr ObjectDetection::TrackedObjectMask(std::shared_ptrGetMask(frame_number, 5); + if (object_mask.HasData() && tracked_object->draw_mask.GetValue(frame_number) == 1) { + QImage mask = alpha_mask_image_from_rle(object_mask) + .scaled(target_image->size(), Qt::IgnoreAspectRatio, Qt::SmoothTransformation); + painter.drawImage(0, 0, mask); + drew_any_box = true; + continue; + } + const double x = (box.cx - box.width / 2.0) * target_image->width(); const double y = (box.cy - box.height / 2.0) * target_image->height(); const double w = box.width * target_image->width(); @@ -521,7 +605,8 @@ void ObjectDetection::SetJsonValue(const Json::Value root) std::shared_ptr firstObject; if (!trackedObjects.empty()) firstObject = trackedObjects.begin()->second; - allObjectsProperties = make_all_objects_properties(firstObject); + allObjectsProperties = make_all_objects_properties( + firstObject, has_tracked_object_mask_data(trackedObjects)); } allObjectsProperties->SetJsonValue(root["objects"][name]); for (auto& trackedObject : trackedObjects) { @@ -579,7 +664,9 @@ std::string ObjectDetection::PropertiesJSON(int64_t requested_frame) const { Json::Value objects; if(selectedObjectIndex == -1 && !trackedObjects.empty()){ - auto selectedObject = allObjectsProperties ? allObjectsProperties : trackedObjects.begin()->second; + auto selectedObject = allObjectsProperties + ? allObjectsProperties + : make_all_objects_properties(trackedObjects.begin()->second, has_tracked_object_mask_data(trackedObjects)); if (selectedObject){ Json::Value trackedObjectJSON = selectedObject->PropertiesJSON(requested_frame); trackedObjectJSON["box_id"]["memo"] = "All Objects"; diff --git a/src/effects/ObjectDetection.h b/src/effects/ObjectDetection.h index f87786268..a9c62711c 100644 --- a/src/effects/ObjectDetection.h +++ b/src/effects/ObjectDetection.h @@ -22,6 +22,7 @@ #include "Json.h" #include "KeyFrame.h" +#include "TrackedObjectBBox.h" namespace openshot { class TrackedObjectBBox; @@ -35,19 +36,22 @@ struct DetectionData{ std::vector _confidences, std::vector> _boxes, size_t _frameId, - std::vector _objectIds) + std::vector _objectIds, + std::vector _masks = {}) { classIds = _classIds; confidences = _confidences; boxes = _boxes; frameId = _frameId; objectIds = _objectIds; + masks = _masks; } size_t frameId; std::vector classIds; std::vector confidences; std::vector> boxes; std::vector objectIds; + std::vector masks; }; namespace openshot diff --git a/src/objdetectdata.proto b/src/objdetectdata.proto index 057a63f4a..2c01d5665 100644 --- a/src/objdetectdata.proto +++ b/src/objdetectdata.proto @@ -21,6 +21,14 @@ message Frame { int32 classId = 5; float confidence = 6; int32 objectId = 7; + + message Mask { + int32 width = 1; + int32 height = 2; + repeated uint32 rle = 3; + } + + Mask mask = 8; } repeated Box bounding_box = 2; diff --git a/tests/EffectMask.cpp b/tests/EffectMask.cpp index f9d2e6243..a6f90829d 100644 --- a/tests/EffectMask.cpp +++ b/tests/EffectMask.cpp @@ -8,7 +8,10 @@ // SPDX-License-Identifier: LGPL-3.0-or-later #include +#include +#include #include +#include #include #include #include @@ -20,6 +23,7 @@ #include "CacheMemory.h" #include "Clip.h" +#include "CVObjectDetection.h" #include "DummyReader.h" #include "effects/Blur.h" #include "effects/Brightness.h" @@ -30,6 +34,7 @@ #include "effects/Sharpen.h" #include "effects/Tracker.h" #include "TrackedObjectBBox.h" +#include "ProcessingController.h" #include "QtImageReader.h" #include "openshot_catch.h" @@ -42,6 +47,33 @@ static std::string temp_png_path(const std::string& base) { return path.str(); } +TEST_CASE("CVObjectDetection normalizes saved track classes", "[libopenshot][opencv][objectdetection]") +{ + const std::string protobuf_path = temp_png_path("objdetector_class_normalize") + ".data"; + const std::string effect_info = "{\"protobuf_data_path\": \"" + protobuf_path + "\"}"; + ProcessingController controller; + CVObjectDetection writer(effect_info, controller); + + for (size_t frame = 1; frame <= 3; ++frame) { + writer.detectionsData[frame] = CVDetectionData( + {5}, {0.90f}, {cv::Rect_(0.1f, 0.1f, 0.2f, 0.2f)}, frame, {1}); + } + for (size_t frame = 4; frame <= 13; ++frame) { + writer.detectionsData[frame] = CVDetectionData( + {2}, {0.40f}, {cv::Rect_(0.1f, 0.1f, 0.2f, 0.2f)}, frame, {1}); + } + + REQUIRE(writer.SaveObjDetectedData()); + + CVObjectDetection reader(effect_info, controller); + REQUIRE(reader._LoadObjDetectdData()); + + CHECK(reader.GetDetectionData(1).classIds.at(0) == 2); + CHECK(reader.GetDetectionData(13).classIds.at(0) == 2); + + std::remove(protobuf_path.c_str()); +} + static std::string create_source_png(int w, int h, const QColor& color) { const std::string path = temp_png_path("source"); QImage image(w, h, QImage::Format_RGBA8888_Premultiplied); @@ -132,6 +164,72 @@ static std::shared_ptr make_input_frame(int64_t number, int width = 2, in return frame; } +static void append_varint(std::string& output, uint32_t value) { + while (value > 0x7f) { + output.push_back(static_cast((value & 0x7f) | 0x80)); + value >>= 7; + } + output.push_back(static_cast(value)); +} + +static void append_fixed32_float(std::string& output, float value) { + uint32_t bits = 0; + static_assert(sizeof(bits) == sizeof(value), "Unexpected float size"); + std::memcpy(&bits, &value, sizeof(bits)); + for (int i = 0; i < 4; ++i) + output.push_back(static_cast((bits >> (8 * i)) & 0xff)); +} + +static void append_length_delimited(std::string& output, uint32_t field_number, const std::string& value) { + append_varint(output, (field_number << 3) | 2); + append_varint(output, static_cast(value.size())); + output.append(value); +} + +static std::string create_object_detection_data_with_mask() { + const std::string path = temp_png_path("object_detection_mask") + ".data"; + std::string mask; + append_varint(mask, 8); + append_varint(mask, 2); + append_varint(mask, 16); + append_varint(mask, 2); + for (uint32_t count : {0u, 1u, 3u}) { + append_varint(mask, 24); + append_varint(mask, count); + } + + std::string box; + append_varint(box, 13); + append_fixed32_float(box, 0.0f); + append_varint(box, 21); + append_fixed32_float(box, 0.0f); + append_varint(box, 29); + append_fixed32_float(box, 1.0f); + append_varint(box, 37); + append_fixed32_float(box, 1.0f); + append_varint(box, 40); + append_varint(box, 0); + append_varint(box, 53); + append_fixed32_float(box, 0.95f); + append_varint(box, 56); + append_varint(box, 1); + append_length_delimited(box, 8, mask); + + std::string frame; + append_varint(frame, 8); + append_varint(frame, 1); + append_length_delimited(frame, 2, box); + + std::string data; + append_length_delimited(data, 1, frame); + append_length_delimited(data, 3, "person"); + + std::ofstream output(path, std::ios::out | std::ios::binary); + output.write(data.data(), static_cast(data.size())); + REQUIRE(output.good()); + return path; +} + TEST_CASE("EffectBase common mask blend applies to ProcessFrame", "[effect][mask][base]") { auto frame = std::make_shared(1, 4, 1, "#000000"); auto image = frame->GetImage(); @@ -176,6 +274,109 @@ TEST_CASE("ObjectDetection all object update applies sparse style keys", "[effec CHECK(second->background.red.GetValue(1) == Approx(200.0)); } +TEST_CASE("ObjectDetection object mask overlay does not fill the whole frame", "[effect][object_detection][mask]") { + ObjectDetection effect; + Clip parent_clip; + effect.ParentClip(&parent_clip); + const std::string protobuf_path = create_object_detection_data_with_mask(); + + Json::Value config; + config["protobuf_data_path"] = protobuf_path; + config["display_boxes"] = Keyframe(0.0).JsonValue(); + config["display_box_text"] = Keyframe(0.0).JsonValue(); + effect.SetJsonValue(config); + + const Json::Value properties = stringToJson(effect.PropertiesJSON(1)); + REQUIRE(properties["objects"].isMember("all")); + REQUIRE(properties["objects"]["all"].isMember("draw_mask")); + CHECK(properties["objects"]["all"]["draw_mask"]["name"].asString() == "Draw Mask"); + CHECK(properties["objects"]["all"]["draw_mask"]["value"].asBool()); + REQUIRE(properties["objects"]["all"].isMember("mask_color")); + CHECK(properties["objects"]["all"]["mask_color"]["name"].asString() == "Mask Color"); + REQUIRE(properties["objects"]["all"].isMember("mask_alpha")); + CHECK(properties["objects"]["all"]["mask_alpha"]["name"].asString() == "Mask Alpha"); + CHECK(properties["objects"]["all"]["mask_alpha"]["value"].asDouble() == Approx(120.0 / 255.0)); + + auto frame = make_input_frame(1, 4, 4); + auto output = effect.GetFrame(frame, 1)->GetImage(); + + CHECK(output->pixelColor(0, 0) != QColor(64, 64, 64, 255)); + CHECK(output->pixelColor(3, 3) == QColor(64, 64, 64, 255)); + + std::remove(protobuf_path.c_str()); +} + +TEST_CASE("ObjectDetection all object mask controls survive style updates", "[effect][object_detection][mask]") { + ObjectDetection effect; + const std::string protobuf_path = create_object_detection_data_with_mask(); + + Json::Value config; + config["protobuf_data_path"] = protobuf_path; + effect.SetJsonValue(config); + + Json::Value update; + update["objects"]["all"]["draw_mask"] = Keyframe(0.0).JsonValue(); + update["objects"]["all"]["mask_alpha"] = Keyframe(0.33).JsonValue(); + effect.SetJsonValue(update); + + Json::Value properties = stringToJson(effect.PropertiesJSON(1)); + REQUIRE(properties["objects"].isMember("all")); + CHECK(properties["objects"]["all"].isMember("draw_mask")); + CHECK(properties["objects"]["all"].isMember("mask_color")); + CHECK(properties["objects"]["all"].isMember("mask_alpha")); + CHECK(properties["objects"]["all"]["draw_mask"]["value"].asBool() == false); + CHECK(properties["objects"]["all"]["mask_alpha"]["value"].asDouble() == Approx(0.33)); + + std::remove(protobuf_path.c_str()); +} + +TEST_CASE("ObjectDetection all object mask controls use any masked object", "[effect][object_detection][mask]") { + ObjectDetection effect; + auto first = std::make_shared(10, 20, 30, 255); + auto second = std::make_shared(200, 210, 220, 255); + first->AddBox(1, 0.25f, 0.25f, 0.2f, 0.2f, 0.0f); + second->AddBox(1, 0.75f, 0.75f, 0.2f, 0.2f, 0.0f); + + ObjectMaskData mask; + mask.width = 2; + mask.height = 2; + mask.rle = {0, 1, 3}; + second->AddMask(1, mask); + + effect.trackedObjects[1] = first; + effect.trackedObjects[2] = second; + effect.selectedObjectIndex = -1; + + Json::Value properties = stringToJson(effect.PropertiesJSON(1)); + REQUIRE(properties["objects"].isMember("all")); + CHECK(properties["objects"]["all"].isMember("draw_mask")); + CHECK(properties["objects"]["all"].isMember("mask_color")); + CHECK(properties["objects"]["all"].isMember("mask_alpha")); +} + +TEST_CASE("TrackedObjectBBox reuses nearby masks for short detection gaps", "[effect][object_detection][mask]") { + TrackedObjectBBox tracked; + tracked.SetBaseFPS(Fraction(30, 1)); + tracked.AddBox(10, 0.25f, 0.25f, 0.2f, 0.2f, 0.0f); + tracked.AddBox(11, 0.29f, 0.25f, 0.2f, 0.2f, 0.0f); + tracked.AddBox(15, 0.45f, 0.25f, 0.2f, 0.2f, 0.0f); + tracked.AddBox(16, 0.49f, 0.25f, 0.2f, 0.2f, 0.0f); + + ObjectMaskData mask; + mask.width = 10; + mask.height = 10; + mask.rle = {22, 1, 77}; + tracked.AddMask(10, mask); + + CHECK(tracked.HasMask(10)); + CHECK_FALSE(tracked.HasMask(11)); + CHECK(tracked.GetMask(11, 5).HasData()); + CHECK(tracked.GetMask(15, 5).HasData()); + CHECK(tracked.GetMask(15, 5).rle != mask.rle); + CHECK_FALSE(tracked.GetMask(16, 5).HasData()); + CHECK(tracked.HasMaskData()); +} + TEST_CASE("ObjectDetection individual style overrides all object style", "[effect][object_detection]") { ObjectDetection effect; auto first = std::make_shared(10, 20, 30, 255);