Argument Struct Reference#
ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument Struct Reference
#include <device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp>
Inheritance diagram for ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument:
Public Member Functions | |
| Argument (const A0DataType *p_a0_grid, const B0DataType *p_b0_grid, std::array< const void *, NumD0Tensor > p_d0s_grid, const B1DataType *p_b1_grid, std::array< const void *, NumD1Tensor > p_d1s_grid, E1DataType *p_e1_grid, index_t MRaw, index_t NRaw, index_t KRaw, index_t Gemm1NRaw, index_t Batch, index_t StrideA0, index_t StrideB0, std::array< index_t, NumD0Tensor > StrideD0s, index_t StrideB1, std::array< index_t, NumD1Tensor > StrideD1s, index_t StrideE1, index_t BatchStrideA0, index_t BatchStrideB0, std::array< index_t, NumD0Tensor > BatchStrideD0s, index_t BatchStrideB1, std::array< index_t, NumD1Tensor > BatchStrideD1s, index_t BatchStrideE1, A0ElementwiseOperation a0_element_op, B0ElementwiseOperation b0_element_op, CDE0ElementwiseOperation cde0_element_op, B1ElementwiseOperation b1_element_op, CDE1ElementwiseOperation cde1_element_op) | |
| Public Member Functions inherited from ck::tensor_operation::device::BaseArgument | |
| BaseArgument ()=default | |
| BaseArgument (const BaseArgument &)=default | |
| BaseArgument & | operator= (const BaseArgument &)=default |
| virtual | ~BaseArgument () |
Public Attributes | |
| const A0DataType * | p_a0_grid_ |
| const B0DataType * | p_b0_grid_ |
| GridwiseGemm64::D0sGridPointer | p_d0s_grid_ |
| const B1DataType * | p_b1_grid_ |
| GridwiseGemm64::D1sGridPointer | p_d1s_grid_ |
| E1DataType * | p_e1_grid_ |
| A0GridDesc_M_K | a0_grid_desc_m_k_ |
| B0GridDesc_N_K | b0_grid_desc_n_k_ |
| D0sGridDesc_M_N | d0s_grid_desc_m_n_ |
| B1GridDesc_N_K | b1_grid_desc_n_k_ |
| D1sGridDesc_M_N | d1s_grid_desc_m_n_ |
| E1GridDesc_M_N | e1_grid_desc_m_n_ |
| A0GridDesc_AK0_M_AK1 | a0_grid_desc_ak0_m_ak1_ |
| B0GridDesc_BK0_N_BK1 | b0_grid_desc_bk0_n_bk1_ |
| B1GridDesc_BK0_N_BK1 | b1_grid_desc_bk0_n_bk1_ |
| GridwiseGemm64::DefaultBlock2E1TileMap | block_2_e1tile_map_ |
| A0ElementwiseOperation | a0_element_op_ |
| B0ElementwiseOperation | b0_element_op_ |
| CDE0ElementwiseOperation | cde0_element_op_ |
| B1ElementwiseOperation | b1_element_op_ |
| CDE1ElementwiseOperation | cde1_element_op_ |
| index_t | batch_count_ |
| ComputeBasePtrOfStridedBatch | compute_base_ptr_of_batch_ |
| Public Attributes inherited from ck::tensor_operation::device::BaseArgument | |
| void * | p_workspace_ = nullptr |
Constructor & Destructor Documentation
◆ Argument()
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
|
inline |
Member Data Documentation
◆ a0_element_op_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| A0ElementwiseOperation ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::a0_element_op_ |
◆ a0_grid_desc_ak0_m_ak1_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| A0GridDesc_AK0_M_AK1 ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::a0_grid_desc_ak0_m_ak1_ |
◆ a0_grid_desc_m_k_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| A0GridDesc_M_K ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::a0_grid_desc_m_k_ |
◆ b0_element_op_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| B0ElementwiseOperation ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::b0_element_op_ |
◆ b0_grid_desc_bk0_n_bk1_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| B0GridDesc_BK0_N_BK1 ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::b0_grid_desc_bk0_n_bk1_ |
◆ b0_grid_desc_n_k_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| B0GridDesc_N_K ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::b0_grid_desc_n_k_ |
◆ b1_element_op_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| B1ElementwiseOperation ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::b1_element_op_ |
◆ b1_grid_desc_bk0_n_bk1_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| B1GridDesc_BK0_N_BK1 ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::b1_grid_desc_bk0_n_bk1_ |
◆ b1_grid_desc_n_k_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| B1GridDesc_N_K ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::b1_grid_desc_n_k_ |
◆ batch_count_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| index_t ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::batch_count_ |
◆ block_2_e1tile_map_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| GridwiseGemm64::DefaultBlock2E1TileMap ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::block_2_e1tile_map_ |
◆ cde0_element_op_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| CDE0ElementwiseOperation ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::cde0_element_op_ |
◆ cde1_element_op_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| CDE1ElementwiseOperation ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::cde1_element_op_ |
◆ compute_base_ptr_of_batch_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| ComputeBasePtrOfStridedBatch ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::compute_base_ptr_of_batch_ |
◆ d0s_grid_desc_m_n_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| D0sGridDesc_M_N ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::d0s_grid_desc_m_n_ |
◆ d1s_grid_desc_m_n_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| D1sGridDesc_M_N ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::d1s_grid_desc_m_n_ |
◆ e1_grid_desc_m_n_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| E1GridDesc_M_N ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::e1_grid_desc_m_n_ |
◆ p_a0_grid_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| const A0DataType* ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::p_a0_grid_ |
◆ p_b0_grid_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| const B0DataType* ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::p_b0_grid_ |
◆ p_b1_grid_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| const B1DataType* ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::p_b1_grid_ |
◆ p_d0s_grid_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| GridwiseGemm64::D0sGridPointer ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::p_d0s_grid_ |
◆ p_d1s_grid_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| GridwiseGemm64::D1sGridPointer ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::p_d1s_grid_ |
◆ p_e1_grid_
template<typename A0Layout, typename B0Layout, typename D0sLayout, typename B1Layout, typename D1sLayout, typename E1Layout, typename A0DataType, typename B0DataType, typename Acc0DataType, typename D0sDataType, typename B1DataType, typename Acc1DataType, typename C1ShuffleDataType, typename D1sDataType, typename E1DataType, typename A0ElementwiseOperation, typename B0ElementwiseOperation, typename CDE0ElementwiseOperation, typename B1ElementwiseOperation, typename CDE1ElementwiseOperation, bool PadGemm0M, bool PadGemm0N, bool PadGemm0K, bool PadGemm1N, bool PadGemm1K, index_t NumGemm0KPrefetchStage, index_t BlockSize, index_t Gemm0MPerBlock, index_t Gemm0NPerBlock, index_t Gemm0KPerBlock, index_t Gemm1NPerBlock, index_t Gemm1KPerBlock, index_t A0K1, index_t B0K1, index_t B1K1, index_t Gemm0MPerXdl, index_t Gemm0NPerXdl, index_t Gemm0MXdlPerWave, index_t Gemm0NXdlPerWave, index_t Gemm1NXdlPerWave, typename A0BlockTransferThreadClusterLengths_AK0_M_AK1, typename A0BlockTransferThreadClusterArrangeOrder, typename A0BlockTransferSrcAccessOrder, index_t A0BlockTransferSrcVectorDim, index_t A0BlockTransferSrcScalarPerVector, index_t A0BlockTransferDstScalarPerVector_AK1, bool A0BlockLdsExtraM, typename B0BlockTransferThreadClusterLengths_BK0_N_BK1, typename B0BlockTransferThreadClusterArrangeOrder, typename B0BlockTransferSrcAccessOrder, index_t B0BlockTransferSrcVectorDim, index_t B0BlockTransferSrcScalarPerVector, index_t B0BlockTransferDstScalarPerVector_BK1, bool B0BlockLdsExtraN, index_t CDE0BlockTransferSrcVectorDim, index_t CDE0BlockTransferSrcScalaerPerVector, typename B1BlockTransferThreadClusterLengths_BK0_N_BK1, typename B1BlockTransferThreadClusterArrangeOrder, typename B1BlockTransferSrcAccessOrder, index_t B1BlockTransferSrcVectorDim, index_t B1BlockTransferSrcScalarPerVector, index_t B1BlockTransferDstScalarPerVector_BK1, bool B1BlockLdsExtraN, index_t C1ShuffleMXdlPerWavePerShuffle, index_t C1ShuffleGemm0NXdlPerWavePerShuffle, typename CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopScheduler LoopSched = LoopScheduler::Default>
| E1DataType* ck::tensor_operation::device::DeviceBatchedGemmMultipleDGemmMultipleD_Xdl_CShuffle< A0Layout, B0Layout, D0sLayout, B1Layout, D1sLayout, E1Layout, A0DataType, B0DataType, Acc0DataType, D0sDataType, B1DataType, Acc1DataType, C1ShuffleDataType, D1sDataType, E1DataType, A0ElementwiseOperation, B0ElementwiseOperation, CDE0ElementwiseOperation, B1ElementwiseOperation, CDE1ElementwiseOperation, PadGemm0M, PadGemm0N, PadGemm0K, PadGemm1N, PadGemm1K, NumGemm0KPrefetchStage, BlockSize, Gemm0MPerBlock, Gemm0NPerBlock, Gemm0KPerBlock, Gemm1NPerBlock, Gemm1KPerBlock, A0K1, B0K1, B1K1, Gemm0MPerXdl, Gemm0NPerXdl, Gemm0MXdlPerWave, Gemm0NXdlPerWave, Gemm1NXdlPerWave, A0BlockTransferThreadClusterLengths_AK0_M_AK1, A0BlockTransferThreadClusterArrangeOrder, A0BlockTransferSrcAccessOrder, A0BlockTransferSrcVectorDim, A0BlockTransferSrcScalarPerVector, A0BlockTransferDstScalarPerVector_AK1, A0BlockLdsExtraM, B0BlockTransferThreadClusterLengths_BK0_N_BK1, B0BlockTransferThreadClusterArrangeOrder, B0BlockTransferSrcAccessOrder, B0BlockTransferSrcVectorDim, B0BlockTransferSrcScalarPerVector, B0BlockTransferDstScalarPerVector_BK1, B0BlockLdsExtraN, CDE0BlockTransferSrcVectorDim, CDE0BlockTransferSrcScalaerPerVector, B1BlockTransferThreadClusterLengths_BK0_N_BK1, B1BlockTransferThreadClusterArrangeOrder, B1BlockTransferSrcAccessOrder, B1BlockTransferSrcVectorDim, B1BlockTransferSrcScalarPerVector, B1BlockTransferDstScalarPerVector_BK1, B1BlockLdsExtraN, C1ShuffleMXdlPerWavePerShuffle, C1ShuffleGemm0NXdlPerWavePerShuffle, CDE1ShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDE1ShuffleBlockTransferScalarPerVector_NPerBlock, LoopSched >::Argument::p_e1_grid_ |
The documentation for this struct was generated from the following file: