unplugged-system/prebuilts/clang/host/linux-x86/clang-r450784e/include/polly/MatmulOptimizer.h

//===- MatmulOptimizer.h -------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef POLLY_MATMULOPTIMIZER_H
#define POLLY_MATMULOPTIMIZER_H

#include "isl/isl-noexceptions.h"

namespace llvm {
class TargetTransformInfo;
}

namespace polly {
struct Dependences;

/// Apply the BLIS matmul optimization pattern if possible.
///
/// Make the loops containing the matrix multiplication be the innermost
/// loops and apply the BLIS matmul optimization pattern. BLIS implements
/// gemm as three nested loops around a macro-kernel, plus two packing
/// routines. The macro-kernel is implemented in terms of two additional
/// loops around a micro-kernel. The micro-kernel is a loop around a rank-1
/// (i.e., outer product) update.
///
/// For a detailed description please see [1].
///
/// The order of the loops defines the data reused in the BLIS implementation
/// of gemm ([1]). In particular, elements of the matrix B, the second
/// operand of matrix multiplication, are reused between iterations of the
/// innermost loop. To keep the reused data in cache, only elements of matrix
/// A, the first operand of matrix multiplication, should be evicted during
/// an iteration of the innermost loop. To provide such a cache replacement
/// policy, elements of the matrix A can, in particular, be loaded first and,
/// consequently, be least-recently-used.
///
/// In our case matrices are stored in row-major order instead of
/// column-major order used in the BLIS implementation ([1]). It affects only
/// on the form of the BLIS micro kernel and the computation of its
/// parameters. In particular, reused elements of the matrix B are
/// successively multiplied by specific elements of the matrix A.
///
/// Refs.:
/// [1] - Analytical Modeling is Enough for High Performance BLIS
/// Tze Meng Low, Francisco D Igual, Tyler M Smith, Enrique S Quintana-Orti
/// Technical Report, 2014
/// http://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf
///
/// @see ScheduleTreeOptimizer::createMicroKernel
/// @see ScheduleTreeOptimizer::createMacroKernel
/// @see getMicroKernelParams
/// @see getMacroKernelParams
///
/// TODO: Implement the packing transformation.
///
/// @param Node The node that contains a band to be optimized. The node
///             is required to successfully pass
///             ScheduleTreeOptimizer::isMatrMultPattern.
/// @param TTI  Target Transform Info.
/// @param D    The dependencies.
///
/// @returns    The transformed schedule or nullptr if the optimization
///             cannot be applied.
isl::schedule_node
tryOptimizeMatMulPattern(isl::schedule_node Node,
                         const llvm::TargetTransformInfo *TTI,
                         const Dependences *D);

} // namespace polly
#endif // POLLY_MATMULOPTIMIZER_H
Initial commit: AOSP 14 with modifications for Unplugged OS 2025-10-06 13:59:42 +00:00			`//===- MatmulOptimizer.h -------------------------------------------------===//`
			`//`
			`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
			`// See https://llvm.org/LICENSE.txt for license information.`
			`// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception`
			`//`
			`//===----------------------------------------------------------------------===//`

			`#ifndef POLLY_MATMULOPTIMIZER_H`
			`#define POLLY_MATMULOPTIMIZER_H`

			`#include "isl/isl-noexceptions.h"`

			`namespace llvm {`
			`class TargetTransformInfo;`
			`}`

			`namespace polly {`
			`struct Dependences;`

			`/// Apply the BLIS matmul optimization pattern if possible.`
			`///`
			`/// Make the loops containing the matrix multiplication be the innermost`
			`/// loops and apply the BLIS matmul optimization pattern. BLIS implements`
			`/// gemm as three nested loops around a macro-kernel, plus two packing`
			`/// routines. The macro-kernel is implemented in terms of two additional`
			`/// loops around a micro-kernel. The micro-kernel is a loop around a rank-1`
			`/// (i.e., outer product) update.`
			`///`
			`/// For a detailed description please see [1].`
			`///`
			`/// The order of the loops defines the data reused in the BLIS implementation`
			`/// of gemm ([1]). In particular, elements of the matrix B, the second`
			`/// operand of matrix multiplication, are reused between iterations of the`
			`/// innermost loop. To keep the reused data in cache, only elements of matrix`
			`/// A, the first operand of matrix multiplication, should be evicted during`
			`/// an iteration of the innermost loop. To provide such a cache replacement`
			`/// policy, elements of the matrix A can, in particular, be loaded first and,`
			`/// consequently, be least-recently-used.`
			`///`
			`/// In our case matrices are stored in row-major order instead of`
			`/// column-major order used in the BLIS implementation ([1]). It affects only`
			`/// on the form of the BLIS micro kernel and the computation of its`
			`/// parameters. In particular, reused elements of the matrix B are`
			`/// successively multiplied by specific elements of the matrix A.`
			`///`
			`/// Refs.:`
			`/// [1] - Analytical Modeling is Enough for High Performance BLIS`
			`/// Tze Meng Low, Francisco D Igual, Tyler M Smith, Enrique S Quintana-Orti`
			`/// Technical Report, 2014`
			`/// http://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf`
			`///`
			`/// @see ScheduleTreeOptimizer::createMicroKernel`
			`/// @see ScheduleTreeOptimizer::createMacroKernel`
			`/// @see getMicroKernelParams`
			`/// @see getMacroKernelParams`
			`///`
			`/// TODO: Implement the packing transformation.`
			`///`
			`/// @param Node The node that contains a band to be optimized. The node`
			`/// is required to successfully pass`
			`/// ScheduleTreeOptimizer::isMatrMultPattern.`
			`/// @param TTI Target Transform Info.`
			`/// @param D The dependencies.`
			`///`
			`/// @returns The transformed schedule or nullptr if the optimization`
			`/// cannot be applied.`
			`isl::schedule_node`
			`tryOptimizeMatMulPattern(isl::schedule_node Node,`
			`const llvm::TargetTransformInfo *TTI,`
			`const Dependences *D);`

			`} // namespace polly`
			`#endif // POLLY_MATMULOPTIMIZER_H`