gemm_pipeline_ag_bg_cr_mem.hpp Source File#
gemm_pipeline_ag_bg_cr_mem.hpp
Go to the documentation of this file.
196 static constexpr index_t GetVectorSizeC() { return Policy::template GetVectorSizeC<Problem>(); }
#define CHECK_TAIL_NUMBER(TAIL_NUMBER, PREFETCH_VALUE)
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
CK_TILE_DEVICE auto load_tile_with_elementwise(const TileWindow_ &tile_window, ElementWise_ elementwise, number< i_access >={}, bool_constant< oob_conditional_check >={})
Load tile with elementwise function.
Definition load_tile.hpp:41
constant< b > bool_constant
Definition tile/core/numeric/integral_constant.hpp:43
typename detail::detector< nonesuch, void, Op, Args... >::value_t is_detected
Definition type_traits.hpp:67
CK_TILE_DEVICE void tile_elementwise_inout(const InOutElementFunc &inout_element_func, InOutDstrTensors &... inout_dstr_tensors)
Definition tile_elementwise.hpp:23
ck_tile::element_wise::PassThrough PassThrough
Definition grouped_convolution_utils.hpp:47
auto concat(const Ts &... xs) -> std::enable_if_t<!AllConvertibleToStringView< Ts... >, std::string >
Definition concat.hpp:43
CK_TILE_DEVICE void transpose_tile2d(OutTensor &out, const InTensor &in)
Definition transpose_tile.hpp:195
CK_TILE_HOST_DEVICE constexpr auto make_static_distributed_tensor(const StaticTileDistribution &)
Definition static_distributed_tensor.hpp:142
CK_TILE_HOST_DEVICE constexpr auto integer_divide_ceil(X x, Y y)
Definition tile/core/numeric/math.hpp:149
CK_TILE_DEVICE void move_tile_window(null_tile_window< WindowLengths > &, const typename null_tile_window< WindowLengths >::BottomTensorIndex &)
Definition null_tile_window.hpp:95
CK_TILE_HOST_DEVICE constexpr auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition tile_distribution.hpp:480
typename impl::tuple_array_impl< T, N >::type tuple_array
Definition tile/core/container/tuple.hpp:28
GemmPipelineScheduler
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:14
@ Intrawave
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:16
@ Interwave
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:17
CK_TILE_HOST_DEVICE constexpr details::return_type< D, Ts... > make_array(Ts &&... ts)
Definition tile/core/container/array.hpp:242
CK_TILE_HOST_DEVICE constexpr auto make_tuple(Xs &&... xs)
Definition tile/core/container/tuple.hpp:360
Definition gemm_pipeline_ag_bg_cr_mem.hpp:19
static constexpr index_t WgpPerCU
Definition gemm_pipeline_ag_bg_cr_mem.hpp:39
static constexpr index_t MPerBlock
Definition gemm_pipeline_ag_bg_cr_mem.hpp:32
static constexpr bool UsePersistentKernel
Definition gemm_pipeline_ag_bg_cr_mem.hpp:53
remove_cvref_t< typename Problem::BDataType > BDataType
Definition gemm_pipeline_ag_bg_cr_mem.hpp:21
static constexpr index_t BPackedSize
Definition gemm_pipeline_ag_bg_cr_mem.hpp:26
static CK_TILE_HOST_DEVICE constexpr TailNumber GetBlockLoopTailNum(index_t num_loop)
Definition gemm_pipeline_ag_bg_cr_mem.hpp:60
static constexpr index_t BlockSize
Definition gemm_pipeline_ag_bg_cr_mem.hpp:31
static constexpr index_t PrefetchStages
Definition gemm_pipeline_ag_bg_cr_mem.hpp:46
static constexpr index_t GlobalBufferNum
Definition gemm_pipeline_ag_bg_cr_mem.hpp:52
static CK_TILE_HOST_DEVICE constexpr auto TransposeC()
Definition gemm_pipeline_ag_bg_cr_mem.hpp:29
static CK_TILE_HOST_DEVICE auto TailHandler(const RunFunction &run_func, bool has_hot_loop, TailNumber tail_number)
Definition gemm_pipeline_ag_bg_cr_mem.hpp:98
static constexpr index_t FullMemBandPrefetchStages
Definition gemm_pipeline_ag_bg_cr_mem.hpp:41
static constexpr index_t LocalPrefillStages
Definition gemm_pipeline_ag_bg_cr_mem.hpp:51
remove_cvref_t< typename Problem::ADataType > ADataType
Definition gemm_pipeline_ag_bg_cr_mem.hpp:20
static constexpr index_t MinMemInFlyBytes
Definition gemm_pipeline_ag_bg_cr_mem.hpp:37
static constexpr index_t KPerBlock
Definition gemm_pipeline_ag_bg_cr_mem.hpp:34
static constexpr index_t APackedSize
Definition gemm_pipeline_ag_bg_cr_mem.hpp:24
remove_cvref_t< typename Problem::BlockGemmShape > BlockGemmShape
Definition gemm_pipeline_ag_bg_cr_mem.hpp:22
static constexpr index_t NPerBlock
Definition gemm_pipeline_ag_bg_cr_mem.hpp:33
static CK_TILE_HOST_DEVICE constexpr bool BlockHasHotloop(index_t num_loop)
Definition gemm_pipeline_ag_bg_cr_mem.hpp:55
Definition gemm_pipeline_ag_bg_cr_base.hpp:13
CK_TILE_DEVICE constexpr auto GetBWindows(const BDramBlockWindowTmp &b_dram_block_window_tmp, const BLdsTensorView &b_lds_block_view, const BLdsLoadTileDistr &, const array< index_t, 2 > &offset={0, 0}) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:225
remove_cvref_t< std::tuple_element_t< number< 0 >{}, BsDataType > > BDataType
Definition gemm_pipeline_ag_bg_cr_base.hpp:22
CK_TILE_DEVICE auto GetABLdsTensorViews(void *p_smem) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:83
static constexpr index_t NPerBlock
Definition gemm_pipeline_ag_bg_cr_base.hpp:26
static constexpr index_t MPerBlock
Definition gemm_pipeline_ag_bg_cr_base.hpp:25
CK_TILE_DEVICE void LocalPrefill(DstTileWindow &lds_tile_window, const SrcBlockTile &src_block_tile, const ElementFunction &element_func) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:57
CK_TILE_DEVICE constexpr auto GetAWindows(const ADramBlockWindowTmp &a_dram_block_window_tmp, const ALdsTensorView &a_lds_block_view, const ALdsLoadTileDistr &, const array< index_t, 2 > &offset={0, 0}) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:190
static constexpr index_t KPerBlock
Definition gemm_pipeline_ag_bg_cr_base.hpp:27
PipelineImplBase Base
Definition gemm_pipeline_ag_bg_cr_mem.hpp:554
CK_TILE_DEVICE auto operator()(const AsDramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BsDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem) const
Definition gemm_pipeline_ag_bg_cr_mem.hpp:565
CK_TILE_DEVICE auto operator()(const AsDramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BsDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem) const
Definition gemm_pipeline_ag_bg_cr_mem.hpp:253
PipelineImplBase Base
Definition gemm_pipeline_ag_bg_cr_mem.hpp:242
Definition gemm_pipeline_ag_bg_cr_mem.hpp:236
Definition gemm_pipeline_ag_bg_cr_mem.hpp:154
remove_cvref_t< typename Problem::BlockGemmShape > BlockGemmShape
Definition gemm_pipeline_ag_bg_cr_mem.hpp:164
static constexpr index_t GetVectorSizeC()
Definition gemm_pipeline_ag_bg_cr_mem.hpp:196
static constexpr index_t GetVectorSizeB()
Definition gemm_pipeline_ag_bg_cr_mem.hpp:192
static constexpr index_t MPerBlock
Definition gemm_pipeline_ag_bg_cr_mem.hpp:182
remove_cvref_t< typename Problem::BsDataTypeTuple > BsDataType
Definition gemm_pipeline_ag_bg_cr_mem.hpp:159
static constexpr index_t Preshuffle
Definition gemm_pipeline_ag_bg_cr_mem.hpp:207
remove_cvref_t< std::tuple_element_t< 0, BsDataType > > BDataType
Definition gemm_pipeline_ag_bg_cr_mem.hpp:174
static constexpr bool DoubleSmemBuffer
Definition gemm_pipeline_ag_bg_cr_mem.hpp:205
BaseGemmPipelineAgBgCrMem< Problem > Base
Definition gemm_pipeline_ag_bg_cr_mem.hpp:155
static constexpr index_t GetSmemPackB()
Definition gemm_pipeline_ag_bg_cr_mem.hpp:199
remove_cvref_t< typename Problem::BElementWise > BElementWise
Definition gemm_pipeline_ag_bg_cr_mem.hpp:163
static constexpr auto TailNum
Definition gemm_pipeline_ag_bg_cr_mem.hpp:211
GemmPipelineAgBgCrImplBase< Problem, Policy > PipelineImplBase
Definition gemm_pipeline_ag_bg_cr_mem.hpp:156
static constexpr index_t GetSmemPackA()
Definition gemm_pipeline_ag_bg_cr_mem.hpp:198
static constexpr index_t NPerBlock
Definition gemm_pipeline_ag_bg_cr_mem.hpp:183
static constexpr index_t KPerBlock
Definition gemm_pipeline_ag_bg_cr_mem.hpp:184
static constexpr index_t GetVectorSizeA()
Definition gemm_pipeline_ag_bg_cr_mem.hpp:187
static constexpr index_t NumWaveGroups
Definition gemm_pipeline_ag_bg_cr_mem.hpp:206
static constexpr auto is_b_load_tr_v
Definition gemm_pipeline_ag_bg_cr_mem.hpp:215
remove_cvref_t< typename Problem::AsLayoutTuple > AsLayout
Definition gemm_pipeline_ag_bg_cr_mem.hpp:166
remove_cvref_t< std::tuple_element_t< 0, BsLayout > > BLayout
Definition gemm_pipeline_ag_bg_cr_mem.hpp:171
remove_cvref_t< typename Problem::CLayout > CLayout
Definition gemm_pipeline_ag_bg_cr_mem.hpp:168
static constexpr index_t PrefetchStages
Definition gemm_pipeline_ag_bg_cr_mem.hpp:46
remove_cvref_t< std::tuple_element_t< 0, AsLayout > > ALayout
Definition gemm_pipeline_ag_bg_cr_mem.hpp:170
static constexpr auto Scheduler
Definition gemm_pipeline_ag_bg_cr_mem.hpp:212
remove_cvref_t< decltype(Policy::template GetBlockGemm< Problem >())> BlockGemm
Definition gemm_pipeline_ag_bg_cr_mem.hpp:176
remove_cvref_t< std::tuple_element_t< 0, AsDataType > > ADataType
Definition gemm_pipeline_ag_bg_cr_mem.hpp:173
CK_TILE_DEVICE auto operator()(const AsDramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BsDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem) const
Definition gemm_pipeline_ag_bg_cr_mem.hpp:876
CK_TILE_DEVICE auto operator()(const AsDramBlockWindowTmp &a_dram_block_window_tmp, const BsDramBlockWindowTmp &b_dram_block_window_tmp, index_t num_loop, bool has_hot_loop, TailNumber tail_number, void *p_smem) const
Definition gemm_pipeline_ag_bg_cr_mem.hpp:897
CK_TILE_DEVICE auto operator()(const AsDramBlockWindowTmp &a_dram_block_window_tmp, const BsDramBlockWindowTmp &b_dram_block_window_tmp, index_t num_loop, void *p_smem) const
Definition gemm_pipeline_ag_bg_cr_mem.hpp:924
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, index_t num_loop, void *p_smem) const
Definition gemm_pipeline_ag_bg_cr_mem.hpp:945
static constexpr auto is_a_load_tr_v
Definition gemm_pipeline_ag_bg_cr_mem.hpp:214
remove_cvref_t< typename Problem::AElementWise > AElementWise
Definition gemm_pipeline_ag_bg_cr_mem.hpp:162
static constexpr bool kPadN
Definition gemm_pipeline_ag_bg_cr_mem.hpp:202
remove_cvref_t< typename Problem::BsLayoutTuple > BsLayout
Definition gemm_pipeline_ag_bg_cr_mem.hpp:167
static CK_TILE_HOST_DEVICE constexpr index_t GetSmemSize()
Definition gemm_pipeline_ag_bg_cr_mem.hpp:229
static constexpr bool HasHotLoop
Definition gemm_pipeline_ag_bg_cr_mem.hpp:210
static constexpr bool kPadM
Definition gemm_pipeline_ag_bg_cr_mem.hpp:201
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BDramBlockWindowTmp &b_dram_block_window_tmp, index_t num_loop, void *p_smem) const
Definition gemm_pipeline_ag_bg_cr_mem.hpp:985
static constexpr bool kPadK
Definition gemm_pipeline_ag_bg_cr_mem.hpp:203
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BDramBlockWindowTmp &b_dram_block_window_tmp, index_t num_loop, bool has_hot_loop, TailNumber tail_number, void *p_smem) const
Definition gemm_pipeline_ag_bg_cr_mem.hpp:965
remove_cvref_t< typename Problem::CDataType > CDataType
Definition gemm_pipeline_ag_bg_cr_mem.hpp:160
static CK_TILE_HOST const std::string GetName()
Definition gemm_pipeline_ag_bg_cr_mem.hpp:217
remove_cvref_t< typename Problem::AsDataTypeTuple > AsDataType
Definition gemm_pipeline_ag_bg_cr_mem.hpp:158
Definition tile/core/numeric/integral_constant.hpp:30
Definition tile/core/numeric/numeric.hpp:81
Definition tile/core/utility/functional.hpp:43