diff --git a/src/models/kv_cache.cpp b/src/models/kv_cache.cpp index 957a7aba0..8ebe0a2a4 100644 --- a/src/models/kv_cache.cpp +++ b/src/models/kv_cache.cpp @@ -187,7 +187,10 @@ DefaultKeyValueCache::DefaultKeyValueCache(State& state) sb_kv_caches_.empty() ? OrtValue::CreateTensor(Allocator(), shape_, type_) : sb_kv_caches_[i]->CreateTensorOnStaticBuffer(shape_, type_)); // Zero the memory so we don't leak any data from the previous run - ByteWrapTensor(Device(), *presents_.back()).Zero(); + // WebGPU device has no Zero() implementation yet. Since this zeroing is optional we disable it for WebGPU for now + if (Device().GetType() != DeviceType::WEBGPU) { + ByteWrapTensor(Device(), *presents_.back()).Zero(); + } } } catch (const Ort::Exception&) { std::ostringstream oss;