GemmTransKernelArg Struct Reference

GemmTransKernelArg Struct Reference#

Composable Kernel: ck::tensor_operation::device::DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVer, LoopSched, ComputeDataType, >::GemmTransKernelArg Struct Reference
ck::tensor_operation::device::DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVer, LoopSched, ComputeDataType, >::GemmTransKernelArg Struct Reference

#include <device_grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp>

Public Member Functions

 GemmTransKernelArg ()=default
 GemmTransKernelArg (GemmKernelArgument &&karg, GroupedGemmBlock2ETileMap &&b2c_map, index_t block_start, index_t block_end)

Public Attributes

GemmKernelArgument karg_
GroupedGemmBlock2ETileMap block_2_ctile_map_
index_t block_start_
index_t block_end_

Constructor & Destructor Documentation

◆ GemmTransKernelArg() [1/2]

template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVersion PipelineVer = PipelineVersion::v1, LoopScheduler LoopSched = make_default_loop_scheduler(), typename ComputeDataType = EDataType, enable_if_t< AK1==BK1, bool > = false>
ck::tensor_operation::device::DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVer, LoopSched, ComputeDataType, >::GemmTransKernelArg::GemmTransKernelArg ( )
default

◆ GemmTransKernelArg() [2/2]

template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVersion PipelineVer = PipelineVersion::v1, LoopScheduler LoopSched = make_default_loop_scheduler(), typename ComputeDataType = EDataType, enable_if_t< AK1==BK1, bool > = false>
ck::tensor_operation::device::DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVer, LoopSched, ComputeDataType, >::GemmTransKernelArg::GemmTransKernelArg ( GemmKernelArgument && karg,
GroupedGemmBlock2ETileMap && b2c_map,
index_t block_start,
index_t block_end )
inline

Member Data Documentation

◆ block_2_ctile_map_

template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVersion PipelineVer = PipelineVersion::v1, LoopScheduler LoopSched = make_default_loop_scheduler(), typename ComputeDataType = EDataType, enable_if_t< AK1==BK1, bool > = false>
GroupedGemmBlock2ETileMap ck::tensor_operation::device::DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVer, LoopSched, ComputeDataType, >::GemmTransKernelArg::block_2_ctile_map_

◆ block_end_

template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVersion PipelineVer = PipelineVersion::v1, LoopScheduler LoopSched = make_default_loop_scheduler(), typename ComputeDataType = EDataType, enable_if_t< AK1==BK1, bool > = false>
index_t ck::tensor_operation::device::DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVer, LoopSched, ComputeDataType, >::GemmTransKernelArg::block_end_

◆ block_start_

template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVersion PipelineVer = PipelineVersion::v1, LoopScheduler LoopSched = make_default_loop_scheduler(), typename ComputeDataType = EDataType, enable_if_t< AK1==BK1, bool > = false>
index_t ck::tensor_operation::device::DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVer, LoopSched, ComputeDataType, >::GemmTransKernelArg::block_start_

◆ karg_

template<typename ALayout, typename BLayout, typename DsLayout, typename ELayout, typename ADataType, typename BDataType, typename AccDataType, typename CShuffleDataType, typename DsDataType, typename EDataType, typename AElementwiseOperation, typename BElementwiseOperation, typename CDEElementwiseOperation, GemmSpecialization GemmSpec, ck::index_t NumGemmKPrefetchStage, ck::index_t BlockSize, ck::index_t MPerBlock, ck::index_t NPerBlock, ck::index_t KPerBlock, ck::index_t AK1, ck::index_t BK1, ck::index_t MPerXDL, ck::index_t NPerXDL, ck::index_t MXdlPerWave, ck::index_t NXdlPerWave, typename ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, typename ABlockTransferThreadClusterArrangeOrder, typename ABlockTransferSrcAccessOrder, index_t ABlockTransferSrcVectorDim, index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferDstScalarPerVector_AK1, index_t ABlockLdsExtraM, typename BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, typename BBlockTransferThreadClusterArrangeOrder, typename BBlockTransferSrcAccessOrder, index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferDstScalarPerVector_BK1, index_t BBlockLdsExtraN, index_t CShuffleMXdlPerWavePerShuffle, index_t CShuffleNXdlPerWavePerShuffle, typename CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, index_t CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVersion PipelineVer = PipelineVersion::v1, LoopScheduler LoopSched = make_default_loop_scheduler(), typename ComputeDataType = EDataType, enable_if_t< AK1==BK1, bool > = false>
GemmKernelArgument ck::tensor_operation::device::DeviceGroupedGemmMultipleDSplitKXdlCShuffleTwoStage< ALayout, BLayout, DsLayout, ELayout, ADataType, BDataType, AccDataType, CShuffleDataType, DsDataType, EDataType, AElementwiseOperation, BElementwiseOperation, CDEElementwiseOperation, GemmSpec, NumGemmKPrefetchStage, BlockSize, MPerBlock, NPerBlock, KPerBlock, AK1, BK1, MPerXDL, NPerXDL, MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_KBatch_AK0_M_AK1, ABlockTransferThreadClusterArrangeOrder, ABlockTransferSrcAccessOrder, ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_AK1, ABlockLdsExtraM, BBlockTransferThreadClusterLengths_KBatch_BK0_N_BK1, BBlockTransferThreadClusterArrangeOrder, BBlockTransferSrcAccessOrder, BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_BK1, BBlockLdsExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, CDEShuffleBlockTransferScalarPerVector_NPerBlock, PipelineVer, LoopSched, ComputeDataType, >::GemmTransKernelArg::karg_

The documentation for this struct was generated from the following file: