enable_if_t< WaveSize==32||WaveSize==64 > > Struct Template Reference

enable_if_t&lt; WaveSize==32||WaveSize==64 &gt; &gt; Struct Template Reference#

Composable Kernel: ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > > Struct Template Reference
ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > > Struct Template Reference

#include <wmma_gemm.hpp>

Public Member Functions

template<index_t MPerWmma, index_t NPerWmma, class FloatA, class FloatB, class FloatC>
__device__ void run (const FloatA &a, const FloatB &b, FloatC &reg_c) const

Static Public Attributes

static constexpr index_t m_per_wmma = 16
static constexpr index_t n_per_wmma = 16
static constexpr index_t k_per_wmma = 16
static constexpr index_t src_a_data_size = 2
static constexpr index_t src_b_data_size = 2
static constexpr index_t acc_data_size = 4
static constexpr index_t acc_pack_number = 1
static constexpr index_t num_thread_per_subgroups = n_per_wmma
static constexpr index_t wave_size = Number<WaveSize>{}
static constexpr index_t num_src_a_vgprs_per_wave = m_per_wmma * src_a_data_size / 4
static constexpr index_t num_src_b_vgprs_per_wave = n_per_wmma * src_b_data_size / 4
static constexpr index_t num_acc_vgprs_per_wave
static constexpr index_t num_subgroups = wave_size / num_thread_per_subgroups

Member Function Documentation

◆ run()

template<index_t WaveSize>
template<index_t MPerWmma, index_t NPerWmma, class FloatA, class FloatB, class FloatC>
__device__ void ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > >::run ( const FloatA & a,
const FloatB & b,
FloatC & reg_c ) const
inline

Member Data Documentation

◆ acc_data_size

template<index_t WaveSize>
index_t ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > >::acc_data_size = 4
staticconstexpr

◆ acc_pack_number

template<index_t WaveSize>
index_t ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > >::acc_pack_number = 1
staticconstexpr

◆ k_per_wmma

template<index_t WaveSize>
index_t ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > >::k_per_wmma = 16
staticconstexpr

◆ m_per_wmma

template<index_t WaveSize>
index_t ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > >::m_per_wmma = 16
staticconstexpr

◆ n_per_wmma

template<index_t WaveSize>
index_t ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > >::n_per_wmma = 16
staticconstexpr

◆ num_acc_vgprs_per_wave

◆ num_src_a_vgprs_per_wave

template<index_t WaveSize>
index_t ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > >::num_src_a_vgprs_per_wave = m_per_wmma * src_a_data_size / 4
staticconstexpr

◆ num_src_b_vgprs_per_wave

template<index_t WaveSize>
index_t ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > >::num_src_b_vgprs_per_wave = n_per_wmma * src_b_data_size / 4
staticconstexpr

◆ num_subgroups

template<index_t WaveSize>
index_t ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > >::num_subgroups = wave_size / num_thread_per_subgroups
staticconstexpr

◆ num_thread_per_subgroups

template<index_t WaveSize>
index_t ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > >::num_thread_per_subgroups = n_per_wmma
staticconstexpr

◆ src_a_data_size

template<index_t WaveSize>
index_t ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > >::src_a_data_size = 2
staticconstexpr

◆ src_b_data_size

template<index_t WaveSize>
index_t ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > >::src_b_data_size = 2
staticconstexpr

◆ wave_size

template<index_t WaveSize>
index_t ck::wmma_type< WmmaInstr::wmma_f32_16x16x16_f16, WaveSize, typename std::enable_if_t< WaveSize==32||WaveSize==64 > >::wave_size = Number<WaveSize>{}
staticconstexpr

The documentation for this struct was generated from the following file: