PartitionedBlockwiseReduction_v2< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterDesc, OpReduce, PropagateNan, Accumulation > Struct Template Reference

PartitionedBlockwiseReduction_v2&lt; AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterDesc, OpReduce, PropagateNan, Accumulation &gt; Struct Template Reference#

Composable Kernel: ck::PartitionedBlockwiseReduction_v2< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterDesc, OpReduce, PropagateNan, Accumulation > Struct Template Reference
ck::PartitionedBlockwiseReduction_v2< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterDesc, OpReduce, PropagateNan, Accumulation > Struct Template Reference

#include <reduction_functions_blockwise.hpp>

Static Public Member Functions

template<typename BufferType>
static __device__ void Reduce (BufferType &work_buffer, AccDataType &in_out_value)

Static Public Attributes

static constexpr auto BufferLength_M = ThreadClusterLengths_M_K::At(0)
static constexpr auto BufferLength_K = ThreadClusterLengths_M_K::At(1)
static constexpr auto block_buf_desc_m_k
static constexpr auto thread_cluster_desc = ThreadClusterDesc{}

Member Function Documentation

◆ Reduce()

template<typename AccDataType, index_t BlockSize, typename ThreadClusterLengths_M_K, typename ThreadClusterDesc, typename OpReduce, bool PropagateNan, typename Accumulation = detail::AccumulateWithNanCheck<PropagateNan, OpReduce, AccDataType>>
template<typename BufferType>
__device__ void ck::PartitionedBlockwiseReduction_v2< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterDesc, OpReduce, PropagateNan, Accumulation >::Reduce ( BufferType & work_buffer,
AccDataType & in_out_value )
inlinestatic

Member Data Documentation

◆ block_buf_desc_m_k

template<typename AccDataType, index_t BlockSize, typename ThreadClusterLengths_M_K, typename ThreadClusterDesc, typename OpReduce, bool PropagateNan, typename Accumulation = detail::AccumulateWithNanCheck<PropagateNan, OpReduce, AccDataType>>
auto ck::PartitionedBlockwiseReduction_v2< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterDesc, OpReduce, PropagateNan, Accumulation >::block_buf_desc_m_k
staticconstexpr
Initial value:
integral_constant< index_t, N > Number
Definition number.hpp:12
__host__ __device__ constexpr auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition tensor_descriptor_helper.hpp:101
__host__ __device__ constexpr auto make_tuple(Xs &&... xs)
Definition utility/tuple.hpp:211

◆ BufferLength_K

template<typename AccDataType, index_t BlockSize, typename ThreadClusterLengths_M_K, typename ThreadClusterDesc, typename OpReduce, bool PropagateNan, typename Accumulation = detail::AccumulateWithNanCheck<PropagateNan, OpReduce, AccDataType>>
auto ck::PartitionedBlockwiseReduction_v2< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterDesc, OpReduce, PropagateNan, Accumulation >::BufferLength_K = ThreadClusterLengths_M_K::At(1)
staticconstexpr

◆ BufferLength_M

template<typename AccDataType, index_t BlockSize, typename ThreadClusterLengths_M_K, typename ThreadClusterDesc, typename OpReduce, bool PropagateNan, typename Accumulation = detail::AccumulateWithNanCheck<PropagateNan, OpReduce, AccDataType>>
auto ck::PartitionedBlockwiseReduction_v2< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterDesc, OpReduce, PropagateNan, Accumulation >::BufferLength_M = ThreadClusterLengths_M_K::At(0)
staticconstexpr

◆ thread_cluster_desc

template<typename AccDataType, index_t BlockSize, typename ThreadClusterLengths_M_K, typename ThreadClusterDesc, typename OpReduce, bool PropagateNan, typename Accumulation = detail::AccumulateWithNanCheck<PropagateNan, OpReduce, AccDataType>>
auto ck::PartitionedBlockwiseReduction_v2< AccDataType, BlockSize, ThreadClusterLengths_M_K, ThreadClusterDesc, OpReduce, PropagateNan, Accumulation >::thread_cluster_desc = ThreadClusterDesc{}
staticconstexpr

The documentation for this struct was generated from the following file: