272 lines
9.7 KiB
C++
272 lines
9.7 KiB
C++
// Copyright 2021, VIXL authors
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above copyright notice,
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
// and/or other materials provided with the distribution.
|
|
// * Neither the name of ARM Limited nor the names of its contributors may be
|
|
// used to endorse or promote products derived from this software without
|
|
// specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
|
|
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
|
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
#include "test-runner.h"
|
|
#include "test-utils.h"
|
|
#include "aarch64/test-utils-aarch64.h"
|
|
|
|
#include "aarch64/cpu-aarch64.h"
|
|
#include "aarch64/disasm-aarch64.h"
|
|
#include "aarch64/macro-assembler-aarch64.h"
|
|
#include "aarch64/simulator-aarch64.h"
|
|
#include "test-assembler-aarch64.h"
|
|
|
|
#define TEST_SVE(name) TEST_SVE_INNER("SIM", name)
|
|
|
|
namespace vixl {
|
|
namespace aarch64 {
|
|
|
|
TEST_SVE(sve_matmul) {
|
|
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
|
|
CPUFeatures::kSVEI8MM,
|
|
CPUFeatures::kNEON,
|
|
CPUFeatures::kCRC32);
|
|
START();
|
|
|
|
SetInitialMachineState(&masm);
|
|
// state = 0xe2bd2480
|
|
|
|
{
|
|
ExactAssemblyScope scope(&masm, 50 * kInstructionSize);
|
|
__ dci(0x45179979); // smmla z25.s, z11.b, z23.b
|
|
// vl128 state = 0xf1ca8a4d
|
|
__ dci(0x45179b51); // smmla z17.s, z26.b, z23.b
|
|
// vl128 state = 0x4458ad10
|
|
__ dci(0x45d79b53); // ummla z19.s, z26.b, z23.b
|
|
// vl128 state = 0x43d4d064
|
|
__ dci(0x45d69b17); // ummla z23.s, z24.b, z22.b
|
|
// vl128 state = 0x601e77c8
|
|
__ dci(0x45c69b33); // ummla z19.s, z25.b, z6.b
|
|
// vl128 state = 0x561b4e22
|
|
__ dci(0x45c49b1b); // ummla z27.s, z24.b, z4.b
|
|
// vl128 state = 0x89b65d78
|
|
__ dci(0x45dc9b1a); // ummla z26.s, z24.b, z28.b
|
|
// vl128 state = 0x85c9e62d
|
|
__ dci(0x45d99b1b); // ummla z27.s, z24.b, z25.b
|
|
// vl128 state = 0x3fc74134
|
|
__ dci(0x45d99b19); // ummla z25.s, z24.b, z25.b
|
|
// vl128 state = 0xa2fa347b
|
|
__ dci(0x45d99b1b); // ummla z27.s, z24.b, z25.b
|
|
// vl128 state = 0xb9854782
|
|
__ dci(0x45899b1a); // usmmla z26.s, z24.b, z9.b
|
|
// vl128 state = 0x7fd376d8
|
|
__ dci(0x45099b8a); // smmla z10.s, z28.b, z9.b
|
|
// vl128 state = 0xb41d8433
|
|
__ dci(0x45019bcb); // smmla z11.s, z30.b, z1.b
|
|
// vl128 state = 0xc9c0e80d
|
|
__ dci(0x45019bdb); // smmla z27.s, z30.b, z1.b
|
|
// vl128 state = 0xf1130e02
|
|
__ dci(0x45019b6b); // smmla z11.s, z27.b, z1.b
|
|
// vl128 state = 0x282d3dc7
|
|
__ dci(0x45019b6f); // smmla z15.s, z27.b, z1.b
|
|
// vl128 state = 0x34570238
|
|
__ dci(0x45859b6b); // usmmla z11.s, z27.b, z5.b
|
|
// vl128 state = 0xc451206a
|
|
__ dci(0x45919b6a); // usmmla z10.s, z27.b, z17.b
|
|
// vl128 state = 0xa58e2ea8
|
|
__ dci(0x45909a62); // usmmla z2.s, z19.b, z16.b
|
|
// vl128 state = 0x7b5f948d
|
|
__ dci(0x45809a52); // usmmla z18.s, z18.b, z0.b
|
|
// vl128 state = 0xf746260d
|
|
__ dci(0x45889b53); // usmmla z19.s, z26.b, z8.b
|
|
// vl128 state = 0xc31cc539
|
|
__ dci(0x45809a57); // usmmla z23.s, z18.b, z0.b
|
|
// vl128 state = 0x736bb3ee
|
|
__ dci(0x45809a96); // usmmla z22.s, z20.b, z0.b
|
|
// vl128 state = 0xbb05fef6
|
|
__ dci(0x45809a92); // usmmla z18.s, z20.b, z0.b
|
|
// vl128 state = 0xbc594372
|
|
__ dci(0x45809a82); // usmmla z2.s, z20.b, z0.b
|
|
// vl128 state = 0x87c5a584
|
|
__ dci(0x45829ad2); // usmmla z18.s, z22.b, z2.b
|
|
// vl128 state = 0xa413f733
|
|
__ dci(0x45889ad6); // usmmla z22.s, z22.b, z8.b
|
|
// vl128 state = 0x87ec445d
|
|
__ dci(0x45c898d2); // ummla z18.s, z6.b, z8.b
|
|
// vl128 state = 0x3ca8a6e5
|
|
__ dci(0x450898d0); // smmla z16.s, z6.b, z8.b
|
|
// vl128 state = 0x4300d87b
|
|
__ dci(0x45189ad8); // smmla z24.s, z22.b, z24.b
|
|
// vl128 state = 0x38be2e8a
|
|
__ dci(0x451c9bd9); // smmla z25.s, z30.b, z28.b
|
|
// vl128 state = 0x8a3e6103
|
|
__ dci(0x45989bc9); // usmmla z9.s, z30.b, z24.b
|
|
// vl128 state = 0xc728e586
|
|
__ dci(0x451c9bd9); // smmla z25.s, z30.b, z28.b
|
|
// vl128 state = 0x4cb44c0e
|
|
__ dci(0x459c99d1); // usmmla z17.s, z14.b, z28.b
|
|
// vl128 state = 0x84ebcb36
|
|
__ dci(0x459c99d5); // usmmla z21.s, z14.b, z28.b
|
|
// vl128 state = 0x8813d2e2
|
|
__ dci(0x451c999d); // smmla z29.s, z12.b, z28.b
|
|
// vl128 state = 0x8f26ee51
|
|
__ dci(0x451c999f); // smmla z31.s, z12.b, z28.b
|
|
// vl128 state = 0x5d626fd0
|
|
__ dci(0x459e998f); // usmmla z15.s, z12.b, z30.b
|
|
// vl128 state = 0x6b64cc8f
|
|
__ dci(0x459f991f); // usmmla z31.s, z8.b, z31.b
|
|
// vl128 state = 0x41648186
|
|
__ dci(0x4587991e); // usmmla z30.s, z8.b, z7.b
|
|
// vl128 state = 0x701525ec
|
|
__ dci(0x45079816); // smmla z22.s, z0.b, z7.b
|
|
// vl128 state = 0x61a2d024
|
|
__ dci(0x450f9897); // smmla z23.s, z4.b, z15.b
|
|
// vl128 state = 0x82ba6bd5
|
|
__ dci(0x450b98d3); // smmla z19.s, z6.b, z11.b
|
|
// vl128 state = 0xa842bbde
|
|
__ dci(0x450b98db); // smmla z27.s, z6.b, z11.b
|
|
// vl128 state = 0x9977677a
|
|
__ dci(0x451f98d3); // smmla z19.s, z6.b, z31.b
|
|
// vl128 state = 0xe6d6c2ef
|
|
__ dci(0x451b9adb); // smmla z27.s, z22.b, z27.b
|
|
// vl128 state = 0xa535453f
|
|
__ dci(0x450b98d9); // smmla z25.s, z6.b, z11.b
|
|
// vl128 state = 0xeda3f381
|
|
__ dci(0x458b9adb); // usmmla z27.s, z22.b, z11.b
|
|
// vl128 state = 0xd72dbdef
|
|
__ dci(0x45cb98da); // ummla z26.s, z6.b, z11.b
|
|
// vl128 state = 0xfae4975b
|
|
__ dci(0x45c999d2); // ummla z18.s, z14.b, z9.b
|
|
// vl128 state = 0x0aa6e1f6
|
|
}
|
|
|
|
uint32_t state;
|
|
ComputeMachineStateHash(&masm, &state);
|
|
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
|
|
__ Ldr(w0, MemOperand(x0));
|
|
|
|
END();
|
|
if (CAN_RUN()) {
|
|
RUN();
|
|
uint32_t expected_hashes[] = {
|
|
0x0aa6e1f6,
|
|
0xba2d4547,
|
|
0x0e72a647,
|
|
0x15b8fc1b,
|
|
0x92eddc98,
|
|
0xe0c72bcf,
|
|
0x36b4e3ba,
|
|
0x1041114e,
|
|
0x4d44ebd4,
|
|
0xfe0e3cbf,
|
|
0x81c43455,
|
|
0x678617c5,
|
|
0xf72fac1f,
|
|
0xabdcd4e4,
|
|
0x108864bd,
|
|
0x035f6eca,
|
|
};
|
|
ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
|
|
}
|
|
}
|
|
|
|
TEST_SVE(sve_fmatmul_s) {
|
|
SVE_SETUP_WITH_FEATURES(CPUFeatures::kSVE,
|
|
CPUFeatures::kSVEF32MM,
|
|
CPUFeatures::kNEON,
|
|
CPUFeatures::kCRC32);
|
|
START();
|
|
|
|
SetInitialMachineState(&masm);
|
|
// state = 0xe2bd2480
|
|
|
|
{
|
|
ExactAssemblyScope scope(&masm, 20 * kInstructionSize);
|
|
__ dci(0x64a1e6ee); // fmmla z14.s, z23.s, z1.s
|
|
// vl128 state = 0x9db41bef
|
|
__ dci(0x64b1e7fe); // fmmla z30.s, z31.s, z17.s
|
|
// vl128 state = 0xc1535e55
|
|
__ dci(0x64b9e7d6); // fmmla z22.s, z30.s, z25.s
|
|
// vl128 state = 0xc65aad35
|
|
__ dci(0x64bde6c6); // fmmla z6.s, z22.s, z29.s
|
|
// vl128 state = 0x68387c22
|
|
__ dci(0x64b9e4c2); // fmmla z2.s, z6.s, z25.s
|
|
// vl128 state = 0xcf08b3a4
|
|
__ dci(0x64b9e543); // fmmla z3.s, z10.s, z25.s
|
|
// vl128 state = 0x969bbe77
|
|
__ dci(0x64b9e553); // fmmla z19.s, z10.s, z25.s
|
|
// vl128 state = 0xc3f514e1
|
|
__ dci(0x64b9e557); // fmmla z23.s, z10.s, z25.s
|
|
// vl128 state = 0x4b351c29
|
|
__ dci(0x64b9e773); // fmmla z19.s, z27.s, z25.s
|
|
// vl128 state = 0x5e026315
|
|
__ dci(0x64bbe757); // fmmla z23.s, z26.s, z27.s
|
|
// vl128 state = 0x61684fe6
|
|
__ dci(0x64bbe755); // fmmla z21.s, z26.s, z27.s
|
|
// vl128 state = 0x719b4ce0
|
|
__ dci(0x64bfe554); // fmmla z20.s, z10.s, z31.s
|
|
// vl128 state = 0xdf3d2a1c
|
|
__ dci(0x64bfe550); // fmmla z16.s, z10.s, z31.s
|
|
// vl128 state = 0x3279aab8
|
|
__ dci(0x64bfe714); // fmmla z20.s, z24.s, z31.s
|
|
// vl128 state = 0x0b985869
|
|
__ dci(0x64b7e756); // fmmla z22.s, z26.s, z23.s
|
|
// vl128 state = 0x14230587
|
|
__ dci(0x64b7e737); // fmmla z23.s, z25.s, z23.s
|
|
// vl128 state = 0x2cb88e7f
|
|
__ dci(0x64bfe767); // fmmla z7.s, z27.s, z31.s
|
|
// vl128 state = 0xb5ec0c65
|
|
__ dci(0x64bfe777); // fmmla z23.s, z27.s, z31.s
|
|
// vl128 state = 0xb5e5eab0
|
|
__ dci(0x64bfe715); // fmmla z21.s, z24.s, z31.s
|
|
// vl128 state = 0xd0491fb5
|
|
__ dci(0x64b7e797); // fmmla z23.s, z28.s, z23.s
|
|
// vl128 state = 0x98a55a30
|
|
}
|
|
|
|
uint32_t state;
|
|
ComputeMachineStateHash(&masm, &state);
|
|
__ Mov(x0, reinterpret_cast<uint64_t>(&state));
|
|
__ Ldr(w0, MemOperand(x0));
|
|
|
|
END();
|
|
if (CAN_RUN()) {
|
|
RUN();
|
|
uint32_t expected_hashes[] = {
|
|
0x98a55a30,
|
|
0x590b7715,
|
|
0x4562ccf3,
|
|
0x1f8653a6,
|
|
0x5fe174d5,
|
|
0xb300dcb8,
|
|
0x3cefa79e,
|
|
0xa22484c7,
|
|
0x380697ec,
|
|
0xde9e699b,
|
|
0x99d21870,
|
|
0x456cb46b,
|
|
0x207d2615,
|
|
0xecaf9678,
|
|
0x0949e2d2,
|
|
0xa764c43f,
|
|
};
|
|
ASSERT_EQUAL_64(expected_hashes[core.GetSVELaneCount(kQRegSize) - 1], x0);
|
|
}
|
|
}
|
|
|
|
} // namespace aarch64
|
|
} // namespace vixl
|