gemm_aquant_pipeline_ag_bg_cr_mem.hpp Source File#
gemm_aquant_pipeline_ag_bg_cr_mem.hpp
Go to the documentation of this file.
119 static constexpr index_t GetVectorSizeA() { return Policy::template GetVectorSizeA<Problem>(); }
120 static constexpr index_t GetVectorSizeB() { return Policy::template GetVectorSizeB<Problem>(); }
121 static constexpr index_t GetVectorSizeC() { return Policy::template GetVectorSizeC<Problem>(); }
Definition tile/core/algorithm/cluster_descriptor.hpp:13
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition type_traits.hpp:21
constant< b > bool_constant
Definition tile/core/numeric/integral_constant.hpp:43
CK_TILE_DEVICE void tile_elementwise_inout(const InOutElementFunc &inout_element_func, InOutDstrTensors &... inout_dstr_tensors)
Definition tile_elementwise.hpp:23
auto concat(const Ts &... xs) -> std::enable_if_t<!AllConvertibleToStringView< Ts... >, std::string >
Definition concat.hpp:43
CK_TILE_DEVICE void transpose_tile2d(OutTensor &out, const InTensor &in)
Definition transpose_tile.hpp:195
CK_TILE_HOST_DEVICE constexpr auto make_static_distributed_tensor(const StaticTileDistribution &)
Definition static_distributed_tensor.hpp:142
CK_TILE_HOST_DEVICE constexpr auto make_static_tile_distribution(StaticTileDistributionEncoding_)
Definition tile_distribution.hpp:480
typename impl::tuple_array_impl< T, N >::type tuple_array
Definition tile/core/container/tuple.hpp:28
GemmPipelineScheduler
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:14
@ Interwave
Definition gemm_pipeline_ag_bg_cr_scheduler.hpp:17
CK_TILE_HOST_DEVICE constexpr details::return_type< D, Ts... > make_array(Ts &&... ts)
Definition tile/core/container/array.hpp:242
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const AElementFunction &a_element_func, const BDramBlockWindowTmp &b_dram_block_window_tmp, const BElementFunction &b_element_func, const AQDramBlockWindowTmp &aq_dram_block_window_tmp, index_t m, index_t num_loop, void *p_smem) const
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:233
PipelineImplBase Base
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:224
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:218
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:80
remove_cvref_t< typename Problem::BDataType > BDataType
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:86
remove_cvref_t< typename Problem::BlockGemmShape > BlockGemmShape
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:88
number< 1 > I1
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:95
remove_cvref_t< decltype(Policy::template GetBlockGemm< Problem >())> BlockGemm
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:111
remove_cvref_t< typename Problem::AQDataType > AQDataType
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:85
remove_cvref_t< typename Problem::ALayout > ALayout
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:106
remove_cvref_t< typename Problem::CDataType > CDataType
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:87
static constexpr bool kPadN
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:131
remove_cvref_t< typename Problem::ADataType > ADataType
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:84
static constexpr index_t GetSmemPackB()
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:128
remove_cvref_t< typename Problem::BLayout > BLayout
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:108
remove_cvref_t< typename Problem::CLayout > CLayout
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:109
remove_cvref_t< typename Problem::AQLayout > AQLayout
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:107
remove_cvref_t< typename Problem::QuantGroupSize > QuantGroupSize
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:89
static constexpr index_t PrefetchStages
Definition gemm_pipeline_ag_bg_cr_mem.hpp:46
static CK_TILE_HOST std::string Print()
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:163
CK_TILE_DEVICE auto operator()(const ADramBlockWindowTmp &a_dram_block_window_tmp, const BDramBlockWindowTmp &b_dram_block_window_tmp, const AQDramBlockWindowTmp &aq_dram_block_window_tmp, index_t m, index_t num_loop, void *p_smem) const
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:463
static constexpr index_t MPerBlock
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:114
static constexpr index_t GetVectorSizeC()
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:121
static constexpr bool HasHotLoop
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:137
BaseGemmPipelineAgBgCrMem< Problem > Base
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:81
GemmAQuantPipelineAgBgCrImplBase< Problem, Policy > PipelineImplBase
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:82
static constexpr auto Scheduler
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:139
static constexpr index_t NPerBlock
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:115
static CK_TILE_HOST const std::string GetName()
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:143
static constexpr index_t BlockSize
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:113
static constexpr index_t AQPackedSize
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:103
static constexpr index_t GetVectorSizeB()
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:120
static constexpr index_t GetSmemPackA()
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:127
static constexpr index_t KPerBlockAQ
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:117
static constexpr bool DoubleSmemBuffer
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:134
number< 0 > I0
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:94
static constexpr bool PreshuffleQuant
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:135
static constexpr index_t GetVectorSizeA()
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:119
number< 2 > I2
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:96
static constexpr index_t GetVectorSizeAQ()
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:122
static constexpr index_t KPerBlock
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:116
static constexpr bool kPadM
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:130
static constexpr index_t APackedSize
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:98
static constexpr index_t BPackedSize
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:100
static CK_TILE_HOST_DEVICE constexpr index_t GetSmemSize()
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:158
static constexpr bool kPadK
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:132
static constexpr auto TailNum
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:138
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:20
static CK_TILE_HOST_DEVICE auto TailHandler(const RunFunction &run_func, bool has_hot_loop, TailNumber tail_number)
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:34
static CK_TILE_HOST_DEVICE constexpr TailNumber GetBlockLoopTailNum(index_t num_loop)
Definition gemm_aquant_pipeline_ag_bg_cr_mem.hpp:21
static constexpr index_t PrefetchStages
Definition gemm_pipeline_ag_bg_cr_comp_v3.hpp:19
Definition gemm_pipeline_ag_bg_cr_mem.hpp:19
static constexpr index_t PrefetchStages
Definition gemm_pipeline_ag_bg_cr_mem.hpp:46
Definition gemm_aquant_pipeline_ag_bg_cr_base.hpp:14
static constexpr index_t NPerBlock
Definition gemm_aquant_pipeline_ag_bg_cr_base.hpp:26
static constexpr index_t KPerBlock
Definition gemm_aquant_pipeline_ag_bg_cr_base.hpp:27
typename Base::BDataType BDataType
Definition gemm_aquant_pipeline_ag_bg_cr_base.hpp:18
CK_TILE_DEVICE constexpr auto GetAQDramLoadWindow(const AQDramBlockWindowTmp &aq_dram_block_window_tmp) const
Definition gemm_aquant_pipeline_ag_bg_cr_base.hpp:37
static constexpr index_t KPerBlockAQ
Definition gemm_aquant_pipeline_ag_bg_cr_base.hpp:29
static constexpr index_t MPerBlock
Definition gemm_aquant_pipeline_ag_bg_cr_base.hpp:25
CK_TILE_DEVICE constexpr auto GetBWindows(const BDramBlockWindowTmp &b_dram_block_window_tmp, const BLdsTensorView &b_lds_block_view, const BLdsLoadTileDistr &, const array< index_t, 2 > &offset={0, 0}) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:225
CK_TILE_DEVICE auto GetABLdsTensorViews(void *p_smem) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:83
CK_TILE_DEVICE void LocalPrefill(DstTileWindow &lds_tile_window, const SrcBlockTile &src_block_tile, const ElementFunction &element_func) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:57
CK_TILE_DEVICE constexpr auto GetAWindows(const ADramBlockWindowTmp &a_dram_block_window_tmp, const ALdsTensorView &a_lds_block_view, const ALdsLoadTileDistr &, const array< index_t, 2 > &offset={0, 0}) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:190
CK_TILE_DEVICE void GlobalPrefetch(DstBlockTile &dst_block_tile, SrcTileWindow &dram_tile_window, const DramTileWindowStep &dram_tile_window_step) const
Definition gemm_pipeline_ag_bg_cr_base.hpp:39
Definition tile/core/numeric/integral_constant.hpp:30
Definition tile/core/numeric/numeric.hpp:81
Definition tile/core/utility/functional.hpp:43