imaginationtech
diff --git a/‎CONTRIBUTING.md
Lines changed: 28 additions & 0 deletions b/‎CONTRIBUTING.md
Lines changed: 28 additions & 0 deletions
diff --git a/‎LICENSE
Lines changed: 31 additions & 0 deletions b/‎LICENSE
Lines changed: 31 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 34 additions & 0 deletions b/‎README.md
Lines changed: 34 additions & 0 deletions
diff --git a/‎bench/add.cc
Lines changed: 166 additions & 0 deletions b/‎bench/add.cc
Lines changed: 166 additions & 0 deletions
@@ -0,0 +1,28 @@
+# How to Contribute
+
+We'd love to accept your patches and contributions to this project. There are
+just a few small guidelines you need to follow.
+
+## Contributor License Agreement
+
+Contributions to this project must be accompanied by a Contributor License
+Agreement. You (or your employer) retain the copyright to your contribution;
+this simply gives us permission to use and redistribute your contributions as
+part of the project. Head over to <https://cla.developers.google.com/> to see
+your current agreements on file or to sign a new one.
+
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+
+## Code reviews
+
+All submissions, including submissions by project members, require review. We
+use GitHub pull requests for this purpose. Consult
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests.
+
+## Community Guidelines
+
+This project follows [Google's Open Source Community
+Guidelines](https://opensource.google.com/conduct/).
@@ -0,0 +1,31 @@
+BSD License
+
+For XNNPACK software
+
+Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
+Copyright 2019 Google LLC
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name Facebook nor the names of its contributors may be used to
+   endorse or promote products derived from this software without specific
+   prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,34 @@
+# XNNPACK
+
+XNNPACK is a highly optimized library of floating-point neural network inference operators for ARM, WebAssembly, and x86 (SSE2 level) platforms. XNNPACK is not intended for direct use by deep learning practitioners researchers; instead it provides low-level performance primitives for accelerating high-level machine learning frameworks, such as [MediaPipe](https://mediapipe.dev), [TensorFlow Lite](https://www.tensorflow.org/lite), and [TensorFlow.js](https://www.tensorflow.org/js).
+
+## Supported Architectures
+
+- ARM on Android, Linux, and iOS
+- ARM64 on Android, Linux, and iOS
+- WebAssembly MVP
+- WebAssembly SIMD (experimental)
+- x86 and x86-64 (up to SSE2 only) on Android, Linux, and Mac
+
+## Operator Coverage
+
+XNNPACK implements the following neural network operators:
+
+- 2D Convolution (including grouped and depthwise)
+- 2D Deconvolution (AKA Transposed Convolution)
+- 2D Average Pooling
+- 2D Max Pooling
+- 2D ArgMax Pooling (Max Pooling + indices)
+- 2D Unpooling
+- Add (tensors of same shape)
+- Global Average Pooling
+- Channel Shuffle
+- Clamp (includes ReLU and ReLU6)
+- HardSwish
+- PReLU
+
+All operators in XNNPACK support NHWC layout, but additionally allow custom stride along the **C**hannel dimension. Thus, operators can consume a subset of channels in the input tensor, and produce a subset of channels in the output tensor, providing a zero-cost Channel Split and Channel Concatenation operations.
+
+## Acknowledgements
+
+XNNPACK is a based on [QNNPACK](https://github.com/pytorch/QNNPACK) library. However, unlike QNNPACK, XNNPACK focuses entirely on floating-point operators, and its API is no longer compatible with QNNPACK.
@@ -0,0 +1,166 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+// All rights reserved.
+//
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <algorithm>
+#include <cmath>
+#include <functional>
+#include <random>
+#include <vector>
+
+#include <xnnpack.h>
+
+#include <benchmark/benchmark.h>
+
+
+static void add_nc_q8(benchmark::State& state) {
+  const size_t batch_size = static_cast<size_t>(state.range(0));
+  const size_t channels = static_cast<size_t>(state.range(1));
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
+
+  std::vector<uint8_t> a(batch_size * channels);
+  std::vector<uint8_t> b(batch_size * channels);
+  std::vector<uint8_t> y(batch_size * channels);
+  std::generate(a.begin(), a.end(), std::ref(u8rng));
+  std::generate(b.begin(), b.end(), std::ref(u8rng));
+
+  xnn_status status = xnn_initialize();
+  if (status != xnn_status_success) {
+    state.SkipWithError("failed to initialize XNNPACK");
+    return;
+  }
+
+  xnn_operator_t add_op = nullptr;
+  status = xnn_create_add_nc_q8(
+    channels, channels /* a_stride */, channels /* b_stride */, channels /* sum_stride */,
+    127 /* a:zero point */, 1.0f /* a:scale */,
+    127 /* b:zero point */, 1.0f /* b:scale */,
+    127 /* y:zero point */, 1.0f /* y:scale */,
+    1 /* y:min */, 254 /* y:max */,
+    0 /* flags */, &add_op);
+  if (status != xnn_status_success || add_op == nullptr) {
+    state.SkipWithError("failed to create Q8 Add operator");
+    return;
+  }
+
+  status = xnn_setup_add_nc_q8(
+    add_op,
+    batch_size,
+    a.data(), b.data(), y.data(),
+    nullptr /* thread pool */);
+  if (status != xnn_status_success) {
+    state.SkipWithError("failed to setup Q8 Add operator");
+    return;
+  }
+
+  for (auto _ : state) {
+    status = xnn_run_operator(add_op, nullptr /* thread pool */);
+    if (status != xnn_status_success) {
+      state.SkipWithError("failed to run Q8 Add operator");
+      return;
+    }
+  }
+
+  status = xnn_delete_operator(add_op);
+  if (status != xnn_status_success) {
+    state.SkipWithError("failed to delete Q8 Add operator");
+    return;
+  }
+
+  const size_t elements_per_iteration = batch_size * channels;
+  state.counters["elements"] =
+    benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
+
+  const size_t bytes_per_iteration = 3 * elements_per_iteration * sizeof(uint8_t);
+  state.counters["bytes"] =
+    benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
+}
+
+static void add_nc_q8_inplace(benchmark::State& state) {
+  const size_t batch_size = static_cast<size_t>(state.range(0));
+  const size_t channels = static_cast<size_t>(state.range(1));
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
+
+  std::vector<uint8_t> a(batch_size * channels);
+  std::vector<uint8_t> y(batch_size * channels);
+  std::generate(a.begin(), a.end(), std::ref(u8rng));
+
+  xnn_status status = xnn_initialize();
+  if (status != xnn_status_success) {
+    state.SkipWithError("failed to initialize XNNPACK");
+    return;
+  }
+
+  xnn_operator_t add_op = nullptr;
+  status = xnn_create_add_nc_q8(
+    channels, channels /* a_stride */, channels /* b_stride */, channels /* sum_stride */,
+    127 /* a:zero point */, 1.0f /* a:scale */,
+    127 /* b:zero point */, 1.0f /* b:scale */,
+    127 /* y:zero point */, 1.0f /* y:scale */,
+    1 /* y:min */, 254 /* y:max */,
+    0 /* flags */, &add_op);
+  if (status != xnn_status_success || add_op == nullptr) {
+    state.SkipWithError("failed to create Q8 Add operator");
+    return;
+  }
+
+  status = xnn_setup_add_nc_q8(
+    add_op,
+    batch_size,
+    a.data(), y.data(), y.data(),
+    nullptr /* thread pool */);
+  if (status != xnn_status_success) {
+    state.SkipWithError("failed to setup Q8 Add operator");
+    return;
+  }
+
+  for (auto _ : state) {
+    status = xnn_run_operator(add_op, nullptr /* thread pool */);
+    if (status != xnn_status_success) {
+      state.SkipWithError("failed to run Q8 Add operator");
+      return;
+    }
+  }
+
+  status = xnn_delete_operator(add_op);
+  if (status != xnn_status_success) {
+    state.SkipWithError("failed to delete Q8 Add operator");
+    return;
+  }
+
+  const size_t elements_per_iteration = batch_size * channels;
+  state.counters["elements"] =
+    benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
+
+  const size_t bytes_per_iteration = 3 * elements_per_iteration * sizeof(uint8_t);
+  state.counters["bytes"] =
+    benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
+}
+
+static void CharacteristicArguments(benchmark::internal::Benchmark* b)
+{
+  b->ArgNames({"N", "C"});
+
+  int32_t c = 16;
+  for (int32_t n = 224; n >= 7; n /= 2) {
+    b->Args({n * n, c});
+    c *= 2;
+  }
+}
+
+BENCHMARK(add_nc_q8)->Apply(CharacteristicArguments)->UseRealTime();
+BENCHMARK(add_nc_q8_inplace)->Apply(CharacteristicArguments)->UseRealTime();
+
+#ifndef XNNPACK_BENCHMARK_NO_MAIN
+BENCHMARK_MAIN();
+#endif