mfma_i32_32x32x16i8 > Struct Reference

mfma_i32_32x32x16i8 > Struct Reference#

Composable Kernel: ck::mfma_type< MfmaInstr::mfma_i32_32x32x16i8 > Struct Reference
ck::mfma_type< MfmaInstr::mfma_i32_32x32x16i8 > Struct Reference

#include <xdlops_gemm.hpp>

Public Member Functions

template<index_t MPerXdlops, index_t NPerXdlops, class FloatA, class FloatB, class FloatC>
__device__ void run (const FloatA &a, const FloatB &b, FloatC &reg_c) const

Static Public Attributes

static constexpr index_t group_size = 4
static constexpr index_t num_groups_per_blk = 4
static constexpr index_t num_regs_per_blk = 16
static constexpr index_t num_threads_per_blk = 32
static constexpr index_t wave_size = 64
static constexpr index_t num_input_blks = 2
static constexpr index_t num_output_blks = 1
static constexpr index_t m_per_blk = 32
static constexpr index_t n_per_blk = 32
static constexpr index_t k_per_blk = 8
static constexpr bool is_k_reduction = true

Member Function Documentation

◆ run()

template<index_t MPerXdlops, index_t NPerXdlops, class FloatA, class FloatB, class FloatC>
__device__ void ck::mfma_type< MfmaInstr::mfma_i32_32x32x16i8 >::run ( const FloatA & a,
const FloatB & b,
FloatC & reg_c ) const
inline

Member Data Documentation

◆ group_size

index_t ck::mfma_type< MfmaInstr::mfma_i32_32x32x16i8 >::group_size = 4
staticconstexpr

◆ is_k_reduction

bool ck::mfma_type< MfmaInstr::mfma_i32_32x32x16i8 >::is_k_reduction = true
staticconstexpr

◆ k_per_blk

index_t ck::mfma_type< MfmaInstr::mfma_i32_32x32x16i8 >::k_per_blk = 8
staticconstexpr

◆ m_per_blk

index_t ck::mfma_type< MfmaInstr::mfma_i32_32x32x16i8 >::m_per_blk = 32
staticconstexpr

◆ n_per_blk

index_t ck::mfma_type< MfmaInstr::mfma_i32_32x32x16i8 >::n_per_blk = 32
staticconstexpr

◆ num_groups_per_blk

index_t ck::mfma_type< MfmaInstr::mfma_i32_32x32x16i8 >::num_groups_per_blk = 4
staticconstexpr

◆ num_input_blks

index_t ck::mfma_type< MfmaInstr::mfma_i32_32x32x16i8 >::num_input_blks = 2
staticconstexpr

◆ num_output_blks

index_t ck::mfma_type< MfmaInstr::mfma_i32_32x32x16i8 >::num_output_blks = 1
staticconstexpr

◆ num_regs_per_blk

index_t ck::mfma_type< MfmaInstr::mfma_i32_32x32x16i8 >::num_regs_per_blk = 16
staticconstexpr

◆ num_threads_per_blk

index_t ck::mfma_type< MfmaInstr::mfma_i32_32x32x16i8 >::num_threads_per_blk = 32
staticconstexpr

◆ wave_size

index_t ck::mfma_type< MfmaInstr::mfma_i32_32x32x16i8 >::wave_size = 64
staticconstexpr

The documentation for this struct was generated from the following file: