274 lines
10 KiB
C
274 lines
10 KiB
C
|
|
// Copyright 2022 Google LLC
|
||
|
|
//
|
||
|
|
// This source code is licensed under the BSD-style license found in the
|
||
|
|
// LICENSE file in the root directory of this source tree.
|
||
|
|
|
||
|
|
#pragma once
|
||
|
|
|
||
|
|
#include <algorithm>
|
||
|
|
#include <cassert>
|
||
|
|
#include <cstddef>
|
||
|
|
#include <cstdlib>
|
||
|
|
#include <functional>
|
||
|
|
#include <numeric>
|
||
|
|
#include <vector>
|
||
|
|
|
||
|
|
#include <xnnpack.h>
|
||
|
|
|
||
|
|
#include <gtest/gtest.h>
|
||
|
|
|
||
|
|
inline size_t reference_index(
|
||
|
|
const size_t* input_stride,
|
||
|
|
const size_t* output_stride,
|
||
|
|
const size_t* perm,
|
||
|
|
const size_t num_dims,
|
||
|
|
size_t pos)
|
||
|
|
{
|
||
|
|
size_t in_pos = 0;
|
||
|
|
for (size_t j = 0; j < num_dims; ++j) {
|
||
|
|
const size_t idx = pos / output_stride[j];
|
||
|
|
pos = pos % output_stride[j];
|
||
|
|
in_pos += idx * input_stride[perm[j]];
|
||
|
|
}
|
||
|
|
return in_pos;
|
||
|
|
}
|
||
|
|
|
||
|
|
class TransposeOperatorTester {
|
||
|
|
public:
|
||
|
|
inline TransposeOperatorTester& num_dims(size_t num_dims) {
|
||
|
|
assert(num_dims != 0);
|
||
|
|
this->num_dims_ = num_dims;
|
||
|
|
return *this;
|
||
|
|
}
|
||
|
|
|
||
|
|
inline size_t num_dims() const { return this->num_dims_; }
|
||
|
|
|
||
|
|
inline TransposeOperatorTester& shape(std::vector<size_t> shape) {
|
||
|
|
assert(shape.size() <= XNN_MAX_TENSOR_DIMS);
|
||
|
|
this->shape_ = shape;
|
||
|
|
return *this;
|
||
|
|
}
|
||
|
|
|
||
|
|
inline const std::vector<size_t>& dims() const { return this->shape_; }
|
||
|
|
|
||
|
|
inline TransposeOperatorTester& perm(std::vector<size_t> perm) {
|
||
|
|
assert(perm.size() <= XNN_MAX_TENSOR_DIMS);
|
||
|
|
this->perm_ = perm;
|
||
|
|
return *this;
|
||
|
|
}
|
||
|
|
|
||
|
|
inline const std::vector<size_t>& perm() const { return this->perm_; }
|
||
|
|
|
||
|
|
void TestX8() const {
|
||
|
|
size_t count = std::accumulate(dims().cbegin(), dims().cend(), 1, std::multiplies<size_t>());
|
||
|
|
std::vector<uint8_t> input(count + XNN_EXTRA_BYTES / sizeof(uint8_t));
|
||
|
|
std::vector<uint8_t> output(count);
|
||
|
|
std::vector<size_t> input_stride(input.size(), 1);
|
||
|
|
std::vector<size_t> output_stride(input.size(), 1);
|
||
|
|
for (size_t i = num_dims() - 1; i > 0; --i) {
|
||
|
|
input_stride[i - 1] = input_stride[i] * shape_[i];
|
||
|
|
output_stride[i - 1] = output_stride[i] * shape_[perm()[i]];
|
||
|
|
}
|
||
|
|
ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
|
||
|
|
xnn_operator_t transpose_op = nullptr;
|
||
|
|
std::iota(input.begin(), input.end(), 0);
|
||
|
|
std::fill(output.begin(), output.end(), UINT8_C(0xA5));
|
||
|
|
|
||
|
|
ASSERT_EQ(xnn_status_success,
|
||
|
|
xnn_create_transpose_nd_x8(0, &transpose_op));
|
||
|
|
ASSERT_NE(nullptr, transpose_op);
|
||
|
|
|
||
|
|
// Smart pointer to automatically delete convert op.
|
||
|
|
std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_transpose_op(transpose_op, xnn_delete_operator);
|
||
|
|
|
||
|
|
ASSERT_EQ(xnn_status_success,
|
||
|
|
xnn_setup_transpose_nd_x8(
|
||
|
|
transpose_op,
|
||
|
|
input.data(), output.data(),
|
||
|
|
num_dims(), shape_.data(), perm_.data(),
|
||
|
|
nullptr /* thread pool */));
|
||
|
|
|
||
|
|
// Run operator.
|
||
|
|
ASSERT_EQ(xnn_status_success,
|
||
|
|
xnn_run_operator(transpose_op, nullptr /* thread pool */));
|
||
|
|
|
||
|
|
// Verify results.
|
||
|
|
for (size_t i = 0; i < count; ++i) {
|
||
|
|
const size_t in_idx = reference_index(input_stride.data(), output_stride.data(), perm_.data(), num_dims(), i);
|
||
|
|
ASSERT_EQ(input[in_idx], output[i]);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void TestRunX8() const {
|
||
|
|
const size_t count = std::accumulate(dims().cbegin(), dims().cend(), 1, std::multiplies<size_t>());
|
||
|
|
std::vector<uint8_t> input(count + XNN_EXTRA_BYTES / sizeof(uint8_t));
|
||
|
|
std::vector<uint8_t> output(count);
|
||
|
|
std::vector<size_t> input_stride(input.size(), 1);
|
||
|
|
std::vector<size_t> output_stride(input.size(), 1);
|
||
|
|
for (size_t i = num_dims() - 1; i > 0; --i) {
|
||
|
|
input_stride[i - 1] = input_stride[i] * shape_[i];
|
||
|
|
output_stride[i - 1] = output_stride[i] * shape_[perm()[i]];
|
||
|
|
}
|
||
|
|
ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
|
||
|
|
std::iota(input.begin(), input.end(), 0);
|
||
|
|
std::fill(output.begin(), output.end(), UINT8_C(0xA5));
|
||
|
|
|
||
|
|
// Call transpose eager API
|
||
|
|
ASSERT_EQ(xnn_status_success,
|
||
|
|
xnn_run_transpose_nd_x8(
|
||
|
|
0 /* flags */,
|
||
|
|
input.data(), output.data(),
|
||
|
|
num_dims(), shape_.data(), perm_.data(),
|
||
|
|
nullptr /* thread pool */));
|
||
|
|
|
||
|
|
// Verify results.
|
||
|
|
for (size_t i = 0; i < count; ++i) {
|
||
|
|
const size_t in_idx = reference_index(input_stride.data(), output_stride.data(), perm_.data(), num_dims(), i);
|
||
|
|
ASSERT_EQ(input[in_idx], output[i]);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void TestX16() const {
|
||
|
|
size_t count = std::accumulate(dims().cbegin(), dims().cend(), 1, std::multiplies<size_t>());
|
||
|
|
std::vector<uint16_t> input(count + XNN_EXTRA_BYTES / sizeof(uint16_t));
|
||
|
|
std::vector<uint16_t> output(count);
|
||
|
|
std::vector<size_t> input_stride(input.size(), 1);
|
||
|
|
std::vector<size_t> output_stride(input.size(), 1);
|
||
|
|
for (size_t i = num_dims() - 1; i > 0; --i) {
|
||
|
|
input_stride[i - 1] = input_stride[i] * shape_[i];
|
||
|
|
output_stride[i - 1] = output_stride[i] * shape_[perm()[i]];
|
||
|
|
}
|
||
|
|
ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
|
||
|
|
xnn_operator_t transpose_op = nullptr;
|
||
|
|
std::iota(input.begin(), input.end(), 0);
|
||
|
|
std::fill(output.begin(), output.end(), UINT16_C(0xDEAD));
|
||
|
|
|
||
|
|
ASSERT_EQ(xnn_status_success,
|
||
|
|
xnn_create_transpose_nd_x16(0, &transpose_op));
|
||
|
|
ASSERT_NE(nullptr, transpose_op);
|
||
|
|
|
||
|
|
// Smart pointer to automatically delete convert op.
|
||
|
|
std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_transpose_op(transpose_op, xnn_delete_operator);
|
||
|
|
|
||
|
|
ASSERT_EQ(xnn_status_success,
|
||
|
|
xnn_setup_transpose_nd_x16(
|
||
|
|
transpose_op,
|
||
|
|
input.data(), output.data(),
|
||
|
|
num_dims(), shape_.data(), perm_.data(),
|
||
|
|
nullptr /* thread pool */));
|
||
|
|
|
||
|
|
// Run operator.
|
||
|
|
ASSERT_EQ(xnn_status_success,
|
||
|
|
xnn_run_operator(transpose_op, nullptr /* thread pool */));
|
||
|
|
|
||
|
|
// Verify results.
|
||
|
|
for (size_t i = 0; i < count; ++i) {
|
||
|
|
const size_t in_idx = reference_index(input_stride.data(), output_stride.data(), perm_.data(), num_dims(), i);
|
||
|
|
ASSERT_EQ(input[in_idx], output[i]);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void TestRunX16() const {
|
||
|
|
const size_t count = std::accumulate(dims().cbegin(), dims().cend(), 1, std::multiplies<size_t>());
|
||
|
|
std::vector<uint16_t> input(count + XNN_EXTRA_BYTES / sizeof(uint16_t));
|
||
|
|
std::vector<uint16_t> output(count);
|
||
|
|
std::vector<size_t> input_stride(input.size(), 1);
|
||
|
|
std::vector<size_t> output_stride(input.size(), 1);
|
||
|
|
for (size_t i = num_dims() - 1; i > 0; --i) {
|
||
|
|
input_stride[i - 1] = input_stride[i] * shape_[i];
|
||
|
|
output_stride[i - 1] = output_stride[i] * shape_[perm()[i]];
|
||
|
|
}
|
||
|
|
ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
|
||
|
|
std::iota(input.begin(), input.end(), 0);
|
||
|
|
std::fill(output.begin(), output.end(), UINT16_C(0xDEADBEEF));
|
||
|
|
|
||
|
|
// Call transpose eager API
|
||
|
|
ASSERT_EQ(xnn_status_success,
|
||
|
|
xnn_run_transpose_nd_x16(
|
||
|
|
0 /* flags */,
|
||
|
|
input.data(), output.data(),
|
||
|
|
num_dims(), shape_.data(), perm_.data(),
|
||
|
|
nullptr /* thread pool */));
|
||
|
|
|
||
|
|
// Verify results.
|
||
|
|
for (size_t i = 0; i < count; ++i) {
|
||
|
|
const size_t in_idx = reference_index(input_stride.data(), output_stride.data(), perm_.data(), num_dims(), i);
|
||
|
|
ASSERT_EQ(input[in_idx], output[i]);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void TestX32() const {
|
||
|
|
size_t count = std::accumulate(dims().cbegin(), dims().cend(), 1, std::multiplies<size_t>());
|
||
|
|
std::vector<uint32_t> input(count + XNN_EXTRA_BYTES / sizeof(uint32_t));
|
||
|
|
std::vector<uint32_t> output(count);
|
||
|
|
std::vector<size_t> input_stride(input.size(), 1);
|
||
|
|
std::vector<size_t> output_stride(input.size(), 1);
|
||
|
|
for (size_t i = num_dims() - 1; i > 0; --i) {
|
||
|
|
input_stride[i - 1] = input_stride[i] * shape_[i];
|
||
|
|
output_stride[i - 1] = output_stride[i] * shape_[perm()[i]];
|
||
|
|
}
|
||
|
|
ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
|
||
|
|
xnn_operator_t transpose_op = nullptr;
|
||
|
|
std::iota(input.begin(), input.end(), 0);
|
||
|
|
std::fill(output.begin(), output.end(), UINT32_C(0xDEADBEEF));
|
||
|
|
|
||
|
|
ASSERT_EQ(xnn_status_success,
|
||
|
|
xnn_create_transpose_nd_x32(0, &transpose_op));
|
||
|
|
ASSERT_NE(nullptr, transpose_op);
|
||
|
|
|
||
|
|
// Smart pointer to automatically delete convert op.
|
||
|
|
std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_transpose_op(transpose_op, xnn_delete_operator);
|
||
|
|
|
||
|
|
ASSERT_EQ(xnn_status_success,
|
||
|
|
xnn_setup_transpose_nd_x32(
|
||
|
|
transpose_op,
|
||
|
|
input.data(), output.data(),
|
||
|
|
num_dims(), shape_.data(), perm_.data(),
|
||
|
|
nullptr /* thread pool */));
|
||
|
|
|
||
|
|
// Run operator.
|
||
|
|
ASSERT_EQ(xnn_status_success,
|
||
|
|
xnn_run_operator(transpose_op, nullptr /* thread pool */));
|
||
|
|
|
||
|
|
// Verify results.
|
||
|
|
for (size_t i = 0; i < count; ++i) {
|
||
|
|
const size_t in_idx = reference_index(input_stride.data(), output_stride.data(), perm_.data(), num_dims(), i);
|
||
|
|
ASSERT_EQ(input[in_idx], output[i]);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void TestRunX32() const {
|
||
|
|
const size_t count = std::accumulate(dims().cbegin(), dims().cend(), 1, std::multiplies<size_t>());
|
||
|
|
std::vector<uint32_t> input(count + XNN_EXTRA_BYTES / sizeof(uint32_t));
|
||
|
|
std::vector<uint32_t> output(count);
|
||
|
|
std::vector<size_t> input_stride(input.size(), 1);
|
||
|
|
std::vector<size_t> output_stride(input.size(), 1);
|
||
|
|
for (size_t i = num_dims() - 1; i > 0; --i) {
|
||
|
|
input_stride[i - 1] = input_stride[i] * shape_[i];
|
||
|
|
output_stride[i - 1] = output_stride[i] * shape_[perm()[i]];
|
||
|
|
}
|
||
|
|
ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));
|
||
|
|
std::iota(input.begin(), input.end(), 0);
|
||
|
|
std::fill(output.begin(), output.end(), UINT32_C(0xDEADBEEF));
|
||
|
|
|
||
|
|
// Call transpose eager API
|
||
|
|
ASSERT_EQ(xnn_status_success,
|
||
|
|
xnn_run_transpose_nd_x32(
|
||
|
|
0,
|
||
|
|
input.data(), output.data(),
|
||
|
|
num_dims(), shape_.data(), perm_.data(),
|
||
|
|
nullptr /* thread pool */));
|
||
|
|
|
||
|
|
// Verify results.
|
||
|
|
for (size_t i = 0; i < count; ++i) {
|
||
|
|
const size_t in_idx = reference_index(input_stride.data(), output_stride.data(), perm_.data(), num_dims(), i);
|
||
|
|
ASSERT_EQ(input[in_idx], output[i]);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
private:
|
||
|
|
size_t num_dims_ = 1;
|
||
|
|
std::vector<size_t> shape_;
|
||
|
|
std::vector<size_t> perm_;
|
||
|
|
};
|