GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v3r3< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, C0GridDesc_M_N, C1GridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, CBlockTransferScalarPerVector_NWaveNPerXdl, NumGemmKPrefetchStage, PipelineVer > Struct Template Reference#
ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v3r3< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, C0GridDesc_M_N, C1GridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, CBlockTransferScalarPerVector_NWaveNPerXdl, NumGemmKPrefetchStage, PipelineVer > Struct Template Reference
#include <gridwise_gemm_xdlops_v3r3.hpp>
Static Public Member Functions | |
| __host__ static __device__ constexpr auto | GetABlockDescriptor_K0PerBlock_MPerBlock_K1 () |
| __host__ static __device__ constexpr auto | GetBBlockDescriptor_K0PerBlock_NPerBlock_K1 () |
| __host__ static __device__ constexpr auto | GetCBlockDescriptor_MBlock_NXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl () |
| __host__ static __device__ constexpr index_t | GetSharedMemoryNumberOfByte () |
| template<InMemoryDataOperationEnum CGlobalMemoryDataOperation_ = InMemoryDataOperationEnum::Set> | |
| static __device__ bool constexpr | IsValidCompilationParameter () |
| template<typename Block2CTileMap> | |
| __host__ static __device__ constexpr bool | CheckValidity (const AGridDesc_K0_M_K1 &a_grid_desc_k0_m_k1, const BGridDesc_K0_N_K1 &b_grid_desc_k0_n_k1, const CGridDesc_M_N &c_grid_desc_m_n, const Block2CTileMap &block_2_ctile_map) |
| __host__ static __device__ constexpr bool | CalculateHasMainKBlockLoop (index_t K) |
| template<typename CGridDesc_M_N_> | |
| __host__ static __device__ constexpr auto | MakeCGridDescriptor_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl (const CGridDesc_M_N_ &c_grid_desc_m_n) |
| __host__ static __device__ constexpr auto | MakeDefaultBlock2CTileMap (const CGridDesc_M_N &c_grid_desc_m_n, index_t, index_t) |
| template<bool HasMainKBlockLoop, typename Block2CTileMap> | |
| static __device__ void | Run (const FloatAB *__restrict__ p_a_grid, const FloatAB *__restrict__ p_b_grid, FloatC *__restrict__ p_c_grid, const FloatC *__restrict__ p_c0_grid, const FloatC *__restrict__ p_c1_grid, void *__restrict__ p_shared, const AGridDesc_K0_M_K1 &a_grid_desc_k0_m_k1, const BGridDesc_K0_N_K1 &b_grid_desc_k0_n_k1, const CGridDescriptor_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl &c_grid_desc_mblock_mxdlperwave_mwavemperxdl_nblock_nxdlperwave_nwavenperxdl, const C0GridDescriptor_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl &c0_grid_desc_mblock_mxdlperwave_mwavemperxdl_nblock_nxdlperwave_nwavenperxdl, const C1GridDescriptor_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl &c1_grid_desc_mblock_mxdlperwave_mwavemperxdl_nblock_nxdlperwave_nwavenperxdl, const AElementwiseOperation &a_element_op, const BElementwiseOperation &b_element_op, const CElementwiseOperation &c_element_op, const Block2CTileMap &block_2_ctile_map) |
Static Public Attributes | |
| static constexpr auto | I0 = Number<0>{} |
| static constexpr auto | I1 = Number<1>{} |
| static constexpr auto | I2 = Number<2>{} |
| static constexpr auto | I3 = Number<3>{} |
| static constexpr auto | I4 = Number<4>{} |
| static constexpr auto | I5 = Number<5>{} |
| static constexpr auto | I6 = Number<6>{} |
| static constexpr auto | I7 = Number<7>{} |
| static constexpr auto | K1 = Number<K1Value>{} |
Member Typedef Documentation
◆ C0GridDescriptor_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
| using ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v3r3< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, C0GridDesc_M_N, C1GridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, CBlockTransferScalarPerVector_NWaveNPerXdl, NumGemmKPrefetchStage, PipelineVer >::C0GridDescriptor_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl |
Initial value:
C0GridDesc_M_N{}))>
__host__ static __device__ constexpr auto MakeCGridDescriptor_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl(const CGridDesc_M_N_ &c_grid_desc_m_n)
Definition gridwise_gemm_xdlops_v3r3.hpp:320
◆ C1GridDescriptor_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
| using ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v3r3< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, C0GridDesc_M_N, C1GridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, CBlockTransferScalarPerVector_NWaveNPerXdl, NumGemmKPrefetchStage, PipelineVer >::C1GridDescriptor_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl |
Initial value:
C1GridDesc_M_N{}))>
◆ CGridDescriptor_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
| using ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v3r3< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, C0GridDesc_M_N, C1GridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, CBlockTransferScalarPerVector_NWaveNPerXdl, NumGemmKPrefetchStage, PipelineVer >::CGridDescriptor_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl |
Initial value:
CGridDesc_M_N{}))>
◆ DefaultBlock2CTileMap
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
| using ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v3r3< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, C0GridDesc_M_N, C1GridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, CBlockTransferScalarPerVector_NWaveNPerXdl, NumGemmKPrefetchStage, PipelineVer >::DefaultBlock2CTileMap |
Initial value:
__host__ static __device__ constexpr auto MakeDefaultBlock2CTileMap(const CGridDesc_M_N &c_grid_desc_m_n, index_t, index_t)
Definition gridwise_gemm_xdlops_v3r3.hpp:347
◆ GridwiseGemmPipe
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
| using ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v3r3< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, C0GridDesc_M_N, C1GridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, CBlockTransferScalarPerVector_NWaveNPerXdl, NumGemmKPrefetchStage, PipelineVer >::GridwiseGemmPipe |
Initial value:
constexpr auto GridwiseGemmPipeline_Selector()
Definition gridwise_gemm_pipeline_selector.hpp:31
◆ ThisThreadBlock
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
| using ck::GridwiseGemm_k0mk1_k0nk1_mn_xdlops_v3r3< BlockSize, FloatAB, FloatAcc, FloatC, CGlobalMemoryDataOperation, AGridDesc_K0_M_K1, BGridDesc_K0_N_K1, CGridDesc_M_N, C0GridDesc_M_N, C1GridDesc_M_N, AElementwiseOperation, BElementwiseOperation, CElementwiseOperation, MPerBlock, NPerBlock, K0PerBlock, MPerXdl, NPerXdl, K1Value, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, AThreadTransferSrcResetCoordinateAfterRun, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, BThreadTransferSrcResetCoordinateAfterRun, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, CBlockTransferScalarPerVector_NWaveNPerXdl, NumGemmKPrefetchStage, PipelineVer >::ThisThreadBlock = ThisThreadBlock<BlockSize> |
Member Function Documentation
◆ CalculateHasMainKBlockLoop()
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
inlinestaticconstexpr |
◆ CheckValidity()
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
template<typename Block2CTileMap>
|
inlinestaticconstexpr |
◆ GetABlockDescriptor_K0PerBlock_MPerBlock_K1()
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
inlinestaticconstexpr |
◆ GetBBlockDescriptor_K0PerBlock_NPerBlock_K1()
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
inlinestaticconstexpr |
◆ GetCBlockDescriptor_MBlock_NXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl()
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
inlinestaticconstexpr |
◆ GetSharedMemoryNumberOfByte()
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
inlinestaticconstexpr |
◆ IsValidCompilationParameter()
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
template<InMemoryDataOperationEnum CGlobalMemoryDataOperation_ = InMemoryDataOperationEnum::Set>
|
inlinestaticconstexpr |
◆ MakeCGridDescriptor_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl()
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
template<typename CGridDesc_M_N_>
|
inlinestaticconstexpr |
◆ MakeDefaultBlock2CTileMap()
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
inlinestaticconstexpr |
◆ Run()
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
template<bool HasMainKBlockLoop, typename Block2CTileMap>
|
inlinestatic |
Member Data Documentation
◆ I0
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
◆ I1
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
◆ I2
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
◆ I3
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
◆ I4
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
◆ I5
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
◆ I6
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
◆ I7
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
◆ K1
template<index_t BlockSize, typename FloatAB, typename FloatAcc, typename FloatC, InMemoryDataOperationEnum CGlobalMemoryDataOperation, typename AGridDesc_K0_M_K1, typename BGridDesc_K0_N_K1, typename CGridDesc_M_N, typename C0GridDesc_M_N, typename C1GridDesc_M_N, typename AElementwiseOperation, typename BElementwiseOperation, typename CElementwiseOperation, index_t MPerBlock, index_t NPerBlock, index_t K0PerBlock, index_t MPerXdl, index_t NPerXdl, index_t K1Value, index_t MXdlPerWave, index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_K0_M_K1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_K1, bool AThreadTransferSrcResetCoordinateAfterRun, bool ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_K0_N_K1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_K1, bool BThreadTransferSrcResetCoordinateAfterRun, bool BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CBlockTransferClusterLengths_MBlock_MXdlPerWave_MWaveMPerXdl_NBlock_NXdlPerWave_NWaveNPerXdl, index_t CBlockTransferScalarPerVector_NWaveNPerXdl, index_t NumGemmKPrefetchStage = 1, PipelineVersion PipelineVer = PipelineVersion::v1>
|
staticconstexpr |
The documentation for this struct was generated from the following file: