diff --git a/c10/xpu/XPUCachingAllocator.cpp b/c10/xpu/XPUCachingAllocator.cpp index 7e534c7aa403b..ca2ebd7292df6 100644 --- a/c10/xpu/XPUCachingAllocator.cpp +++ b/c10/xpu/XPUCachingAllocator.cpp @@ -900,6 +900,16 @@ class DeviceCachingAllocator { } bool active_pool = p.pool->owner_PrivatePool && p.pool->owner_PrivatePool->allocator(); + + const auto& raw_device = c10::xpu::get_raw_device(p.device()); + if (!active_pool && raw_device.has(sycl::aspect::ext_intel_free_memory)) { + const size_t device_free = + raw_device.get_info(); + if (size > device_free) { + return false; + } + } + if (set_fraction && stats.reserved_bytes[static_cast(StatType::AGGREGATE)].current + size > diff --git a/test/test_xpu.py b/test/test_xpu.py index c0f03b8247b46..02fd2e102cf28 100644 --- a/test/test_xpu.py +++ b/test/test_xpu.py @@ -566,6 +566,26 @@ def test_raises_oom(self): with self.assertRaises(torch.OutOfMemoryError): torch.empty(1024 * 1024 * 1024 * 1024, device="xpu") + @serialTest() + @unittest.skipIf( + int(torch.version.xpu) < 20250000, + "Test requires SYCL compiler version 2025.0.0 or newer.", + ) + def test_allocation_raises_oom(self): + gc.collect() + torch.xpu.empty_cache() + free_bytes, _ = torch.xpu.mem_get_info() + alloc_bytes = free_bytes * 5 // 9 + tensor = None + try: + tensor = torch.empty(alloc_bytes, dtype=torch.bool, device="xpu") + with self.assertRaises(torch.OutOfMemoryError): + tensor.clone() + finally: + del tensor + gc.collect() + torch.xpu.empty_cache() + @serialTest() def test_1mb_allocation_uses_small_block(self): gc.collect()