diff --git a/modules/cudafilters/include/opencv2/cudafilters.hpp b/modules/cudafilters/include/opencv2/cudafilters.hpp index 2aa9c846462..d92bdde2caa 100644 --- a/modules/cudafilters/include/opencv2/cudafilters.hpp +++ b/modules/cudafilters/include/opencv2/cudafilters.hpp @@ -97,6 +97,9 @@ center. @param borderMode Pixel extrapolation method. For details, see borderInterpolate . @param borderVal Default border value. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa boxFilter */ CV_EXPORTS_W Ptr createBoxFilter(int srcType, int dstType, Size ksize, Point anchor = Point(-1, -1), @@ -115,6 +118,9 @@ center. @param borderMode Pixel extrapolation method. For details, see borderInterpolate . @param borderVal Default border value. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa filter2D */ CV_EXPORTS_W Ptr createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor = Point(-1, -1), @@ -134,6 +140,9 @@ applied (see getDerivKernels ). @param borderMode Pixel extrapolation method. For details, see borderInterpolate . @param borderVal Default border value. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa Laplacian */ CV_EXPORTS_W Ptr createLaplacianFilter(int srcType, int dstType, int ksize = 1, double scale = 1, @@ -156,6 +165,9 @@ the aperture center. borderInterpolate. @param columnBorderMode Pixel extrapolation method in the horizontal direction. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa sepFilter2D */ CV_EXPORTS_W Ptr createSeparableLinearFilter(int srcType, int dstType, InputArray rowKernel, InputArray columnKernel, @@ -178,6 +190,9 @@ applied. For details, see getDerivKernels . @param rowBorderMode Pixel extrapolation method in the vertical direction. For details, see borderInterpolate. @param columnBorderMode Pixel extrapolation method in the horizontal direction. + +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. */ CV_EXPORTS_W Ptr createDerivFilter(int srcType, int dstType, int dx, int dy, int ksize, bool normalize = false, double scale = 1, @@ -196,6 +211,9 @@ applied. For details, see getDerivKernels . borderInterpolate. @param columnBorderMode Pixel extrapolation method in the horizontal direction. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa Sobel */ CV_EXPORTS_W Ptr createSobelFilter(int srcType, int dstType, int dx, int dy, int ksize = 3, @@ -213,6 +231,9 @@ applied. See getDerivKernels for details. borderInterpolate. @param columnBorderMode Pixel extrapolation method in the horizontal direction. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa Scharr */ CV_EXPORTS_W Ptr createScharrFilter(int srcType, int dstType, int dx, int dy, @@ -233,6 +254,9 @@ CV_EXPORTS_W Ptr createScharrFilter(int srcType, int dstType, int dx, in borderInterpolate. @param columnBorderMode Pixel extrapolation method in the horizontal direction. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa GaussianBlur */ CV_EXPORTS_W Ptr createGaussianFilter(int srcType, int dstType, Size ksize, @@ -258,6 +282,9 @@ CV_EXPORTS_W Ptr createGaussianFilter(int srcType, int dstType, Size ksi is at the center. @param iterations Number of times erosion and dilation to be applied. +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. + @sa morphologyEx */ CV_EXPORTS_W Ptr createMorphologyFilter(int op, int srcType, InputArray kernel, Point anchor = Point(-1, -1), int iterations = 1); @@ -272,6 +299,9 @@ CV_EXPORTS_W Ptr createMorphologyFilter(int op, int srcType, InputArray @param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center. @param borderMode Pixel extrapolation method. For details, see borderInterpolate . @param borderVal Default border value. + +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. */ CV_EXPORTS_W Ptr createBoxMaxFilter(int srcType, Size ksize, Point anchor = Point(-1, -1), @@ -284,6 +314,9 @@ CV_EXPORTS_W Ptr createBoxMaxFilter(int srcType, Size ksize, @param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center. @param borderMode Pixel extrapolation method. For details, see borderInterpolate . @param borderVal Default border value. + +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. */ CV_EXPORTS_W Ptr createBoxMinFilter(int srcType, Size ksize, Point anchor = Point(-1, -1), @@ -300,6 +333,9 @@ CV_EXPORTS_W Ptr createBoxMinFilter(int srcType, Size ksize, @param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center. @param borderMode Pixel extrapolation method. For details, see borderInterpolate . @param borderVal Default border value. + +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. */ CV_EXPORTS_W Ptr createRowSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); @@ -311,6 +347,9 @@ CV_EXPORTS_W Ptr createRowSumFilter(int srcType, int dstType, int ksize, @param anchor Anchor point. The default value (-1) means that the anchor is at the kernel center. @param borderMode Pixel extrapolation method. For details, see borderInterpolate . @param borderVal Default border value. + +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. */ CV_EXPORTS_W Ptr createColumnSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); @@ -329,6 +368,9 @@ Outputs an image that has been filtered using a median-filtering formulation. Details on this algorithm can be found in: Green, O., 2017. "Efficient scalable median filtering using histogram-based operations", IEEE Transactions on Image Processing, 27(5), pp.2217-2228. + +@note +If applied in a CUDA Stream, a distinct filter instance must be created for each Stream. Sharing a single instance across multiple streams is unsupported and may lead to undefined behavior due to stream-specific internal state. */ CV_EXPORTS_W Ptr createMedianFilter(int srcType, int windowSize, int partition = 128); diff --git a/modules/cudawarping/include/opencv2/cudawarping.hpp b/modules/cudawarping/include/opencv2/cudawarping.hpp index b9ca957358e..6c920fa8c24 100644 --- a/modules/cudawarping/include/opencv2/cudawarping.hpp +++ b/modules/cudawarping/include/opencv2/cudawarping.hpp @@ -118,6 +118,7 @@ CV_EXPORTS_W void resize(InputArray src, OutputArray dst, Size dsize, double fx= @param src Source image. CV_8U , CV_16U , CV_32S , or CV_32F depth and 1, 3, or 4 channels are supported. @param dst Destination image with the same type as src . The size is dsize . + **In-place operation (src == dst) is not supported and will result in an error.** @param M *2x3* Mat or UMat transformation matrix. @param dsize Size of the destination image. @param flags Combination of interpolation methods (see resize) and the optional flag @@ -127,6 +128,7 @@ INTER_NEAREST , INTER_LINEAR , and INTER_CUBIC interpolation methods are support @param borderValue @param stream Stream for the asynchronous version. +@note In-place operation is not supported. If src and dst refer to the same data, the behavior is undefined. @sa warpAffine */ CV_EXPORTS void warpAffine(InputArray src, OutputArray dst, InputArray M, Size dsize, int flags = INTER_LINEAR, diff --git a/modules/cudawarping/src/warp.cpp b/modules/cudawarping/src/warp.cpp index 8690f54085d..839b786ce45 100644 --- a/modules/cudawarping/src/warp.cpp +++ b/modules/cudawarping/src/warp.cpp @@ -208,6 +208,8 @@ void cv::cuda::warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size _dst.create(dsize, src.type()); GpuMat dst = _dst.getGpuMat(); + CV_Assert( src.data != dst.data && "In-place operation not supported for cv::cuda::warpAffine" ); + Size wholeSize; Point ofs; src.locateROI(wholeSize, ofs); diff --git a/modules/cudawarping/test/test_warp_affine.cpp b/modules/cudawarping/test/test_warp_affine.cpp index d26a5fdeb7c..ebbba914ced 100644 --- a/modules/cudawarping/test/test_warp_affine.cpp +++ b/modules/cudawarping/test/test_warp_affine.cpp @@ -222,6 +222,36 @@ CUDA_TEST_P(WarpAffine, Accuracy) EXPECT_MAT_NEAR(dst_gold, dst, src.depth() == CV_32F ? 1e-1 : 1.0); } +CUDA_TEST_P(WarpAffine, OverlapDetection) +{ + cv::Mat src = randomMat(size, type); + ASSERT_FALSE(src.empty()); + cv::cuda::GpuMat gpuSrc; + gpuSrc.upload(src); + + cv::Mat M = cv::Mat::eye(2, 3, CV_64FC1); + int flags = interpolation; + if (inverse) + flags |= cv::WARP_INVERSE_MAP; + + { + cv::cuda::GpuMat gpuDst(gpuSrc, cv::Rect(0, 0, size.width, size.height)); + + EXPECT_THROW( + cv::cuda::warpAffine(gpuSrc, gpuDst, M, size, flags, borderType, cv::Scalar::all(0)), + cv::Exception); + } + + { + cv::cuda::GpuMat gpuDst(size, gpuSrc.type()); + ASSERT_NE(gpuSrc.data, gpuDst.data); // Confirm they are distinct + + EXPECT_NO_THROW({ + cv::cuda::warpAffine(gpuSrc, gpuDst, M, size, flags, borderType, cv::Scalar::all(0)); + }); + } +} + INSTANTIATE_TEST_CASE_P(CUDA_Warping, WarpAffine, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, diff --git a/modules/gapi/include/opencv2/gapi/infer/ov.hpp b/modules/gapi/include/opencv2/gapi/infer/ov.hpp index 9515744cac4..3673ff53a28 100644 --- a/modules/gapi/include/opencv2/gapi/infer/ov.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/ov.hpp @@ -66,6 +66,8 @@ struct ParamDesc { LayerVariantAttr> scale_values; LayerVariantAttr interpolation; + + bool clamp_outputs = false; }; struct CompiledModel { @@ -356,6 +358,24 @@ template struct Params { return *this; } + /** @brief Enables or disables clamping of model outputs in the PrePostProcessor. + + By default, output values are clamped to the valid range for the output precision + by the device or plugin. Enabling this option moves clamping to the PrePostProcessor stage. + + @note This feature is only available with OpenVINO 2025.2 and newer. + + @param flag If true, clamping is performed in the PrePostProcessor; + otherwise, it is handled by the device or plugin. + @return reference to this parameter structure. + */ + Params& + cfgClampOutputs(bool flag = true) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "clamp outputs") + .clamp_outputs = std::move(flag); + return *this; + } + /** @brief Specifies the new shape for input layers. The function is used to set new shape for input layers. @@ -625,6 +645,14 @@ class Params { return *this; } + /** @see ov::Params::cfgClampOutputs. */ + Params& + cfgClampOutputs(bool flag = true) { + detail::getModelToSetAttrOrThrow(m_desc.kind, "clamp outputs") + .clamp_outputs = std::move(flag); + return *this; + } + /** @see ov::Params::cfgReshape. */ Params& cfgReshape(std::vector new_shape) { detail::getModelToSetAttrOrThrow(m_desc.kind, "reshape") diff --git a/modules/gapi/src/backends/ov/govbackend.cpp b/modules/gapi/src/backends/ov/govbackend.cpp index dbaba382db9..4ea1c1cc0f3 100644 --- a/modules/gapi/src/backends/ov/govbackend.cpp +++ b/modules/gapi/src/backends/ov/govbackend.cpp @@ -147,6 +147,25 @@ static int toCV(const ov::element::Type &type) { return -1; } +static inline std::pair get_CV_type_range(int cv_type) { + switch (cv_type) { + case CV_8U: + return { static_cast(std::numeric_limits::min()), + static_cast(std::numeric_limits::max()) }; + case CV_32S: + return { static_cast(std::numeric_limits::min()), + static_cast(std::numeric_limits::max()) }; + case CV_32F: + return { static_cast(std::numeric_limits::lowest()), + static_cast(std::numeric_limits::max()) }; + case CV_16F: + return { -65504.0, 65504.0 }; + default: + GAPI_Error("OV Backend: Unsupported data type"); + } + return {0.0, 0.0}; +} + static void copyFromOV(const ov::Tensor &tensor, cv::Mat &mat) { const auto total = mat.total() * mat.channels(); if (toCV(tensor.get_element_type()) != mat.depth() || @@ -1052,6 +1071,20 @@ class PrePostProcWrapper { if (explicit_out_tensor_prec) { m_ppp.output(output_name).tensor() .set_element_type(toOV(*explicit_out_tensor_prec)); + + if (m_model_info.clamp_outputs) { + #if INF_ENGINE_RELEASE >= 2025020000 + auto clamp_range = get_CV_type_range(*explicit_out_tensor_prec); + m_ppp.output(output_name).postprocess() + .clamp(clamp_range.first, clamp_range.second); + #else + static bool warned = false; + if (!warned) { + GAPI_LOG_WARNING(NULL, "cfgClampOutputs is enabled, but not supported in this OpenVINO version. Clamping will be ignored."); + warned = true; + } + #endif // INF_ENGINE_RELEASE >= 2025020000 + } } } } diff --git a/modules/videostab/src/cuda/global_motion.cu b/modules/videostab/src/cuda/global_motion.cu index 7eca6ff76b7..c20ccfc2ed4 100644 --- a/modules/videostab/src/cuda/global_motion.cu +++ b/modules/videostab/src/cuda/global_motion.cu @@ -52,6 +52,11 @@ namespace cv { namespace cuda { namespace device { namespace globmotion { __constant__ float cml[9]; __constant__ float cmr[9]; +struct is_zero +{ + __host__ __device__ bool operator()(uchar x) const { return x == 0; } +}; + int compactPoints(int N, float *points0, float *points1, const uchar *mask) { thrust::device_ptr dpoints0((float2*)points0); @@ -60,7 +65,7 @@ int compactPoints(int N, float *points0, float *points1, const uchar *mask) return (int)(thrust::remove_if(thrust::make_zip_iterator(thrust::make_tuple(dpoints0, dpoints1)), thrust::make_zip_iterator(thrust::make_tuple(dpoints0 + N, dpoints1 + N)), - dmask, thrust::not1(thrust::identity())) + dmask, is_zero()) - thrust::make_zip_iterator(make_tuple(dpoints0, dpoints1))); }