scaled_type_convert.hpp Source File

scaled_type_convert.hpp Source File#

Composable Kernel: scaled_type_convert.hpp Source File
scaled_type_convert.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: MIT
2// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
3
4#pragma once
5
8
9#ifdef CK_USE_NATIVE_MX_SUPPORT
10#define CK_USE_NATIVE_MX_SUPPORT 1
11#else
12#define CK_USE_NATIVE_MX_SUPPORT 0
13#endif
14
15namespace ck {
16
17// Declare a template function for scaled conversion
18template <typename Y, typename X>
19#if CK_USE_OCP_FP8
20__host__ __device__ constexpr Y scaled_type_convert(e8m0_bexp_t scale, X x);
21#else
22__host__ constexpr Y scaled_type_convert(e8m0_bexp_t scale, X x);
23#endif
24
25// convert f8_ocp_t to fp32
26template <>
27#if CK_USE_OCP_FP8
28inline __host__ __device__ float scaled_type_convert<float, f8_ocp_t>(e8m0_bexp_t scale, f8_ocp_t x)
29#else
31#endif
32{
33
34#if CK_MX_FP8_CVT_FAST_PATH
35 return fp8_impl::cast_to_f32_from_f8_scaled<f8_ocp_t::default_interpret>(
36 type_convert<float>(scale), x.data);
37#else
39#endif
40}
41
42// convert bf8_ocp_t to fp32
43template <>
44#if CK_USE_OCP_FP8
45inline __host__ __device__ float scaled_type_convert<float, bf8_ocp_t>(e8m0_bexp_t scale,
46 bf8_ocp_t x)
47#else
49#endif
50{
51
52#if CK_MX_FP8_CVT_FAST_PATH
53 return fp8_impl::cast_to_f32_from_f8_scaled<bf8_ocp_t::default_interpret>(
54 type_convert<float>(scale), x.data);
55#else
57#endif
58}
59
60// convert 2 x f8_ocp_t to 2 x fp32
61template <>
62#if CK_USE_OCP_FP8
63inline __host__ __device__ float2_t scaled_type_convert<float2_t, f8x2_ocp_t>(e8m0_bexp_t scale,
64 f8x2_ocp_t x)
65#else
67#endif
68{
69#if CK_MX_FP8_CVT_FAST_PATH
70 return fp8_impl::cast_to_f32_from_f8_scaled<f8_ocp_t::default_interpret>(
72#else
73 return float2_t{scaled_type_convert<float>(scale, x.AsType<f8_ocp_t>()[Number<0>{}]),
74 scaled_type_convert<float>(scale, x.AsType<f8_ocp_t>()[Number<1>{}])};
75#endif
76}
77
78// convert 2 x bf8_ocp_t to 2 x fp32
79template <>
80#if CK_USE_OCP_FP8
81inline __host__ __device__ float2_t scaled_type_convert<float2_t, bf8x2_ocp_t>(e8m0_bexp_t scale,
83#else
86#endif
87{
88#if CK_MX_FP8_CVT_FAST_PATH
89 return fp8_impl::cast_to_f32_from_f8_scaled<bf8_ocp_t::default_interpret>(
91#else
92 return float2_t{scaled_type_convert<float>(scale, x.AsType<bf8_ocp_t>()[Number<0>{}]),
93 scaled_type_convert<float>(scale, x.AsType<bf8_ocp_t>()[Number<1>{}])};
94#endif
95}
96
97// convert 16 x f8_ocp_t to 16 x fp32
98// @note Host version gives compilation error. Requires extra compiler options.
99template <>
100#if CK_USE_OCP_FP8
101inline __host__ __device__ float16_t scaled_type_convert<float16_t, f8x16_ocp_t>(e8m0_bexp_t scale,
102 f8x16_ocp_t x)
103#else
105 f8x16_ocp_t x)
106#endif
107{
108 union
109 {
110 f8x16_ocp_t f8_1x16;
111 f8x2_ocp_t f8_2x8[8];
112 } in{x};
113 union
114 {
115 float16_t float_1x16;
116 float2_t float_2x8[8];
117 } out{};
118
119 ck::static_for<0, 8, 1>{}([&](auto i) {
120 out.float_2x8[i] = scaled_type_convert<float2_t, f8x2_ocp_t>(scale, in.f8_2x8[i]);
121 });
122
123 return out.float_1x16;
124}
125
126// convert 16 x bf8_ocp_t to 16 x fp32
127// @note Host version gives compilation error. Requires extra compiler options.
128template <>
129#if CK_USE_OCP_FP8
130inline __host__ __device__ float16_t scaled_type_convert<float16_t, bf8x16_ocp_t>(e8m0_bexp_t scale,
131 bf8x16_ocp_t x)
132#else
134 bf8x16_ocp_t x)
135#endif
136{
137 union
138 {
139 bf8x16_ocp_t bf8_1x16;
140 bf8x2_ocp_t bf8_2x8[8];
141 } in{x};
142 union
143 {
144 float16_t float_1x16;
145 float2_t float_2x8[8];
146 } out{};
147
148 ck::static_for<0, 8, 1>{}([&](auto i) {
149 out.float_2x8[i] = scaled_type_convert<float2_t, bf8x2_ocp_t>(scale, in.bf8_2x8[i]);
150 });
151
152 return out.float_1x16;
153}
154
155// convert 32 x f8_ocp_t to 32 x fp32
156// @note Host version gives compilation error. Requires extra compiler options.
157template <>
158#if CK_USE_OCP_FP8
159inline __host__ __device__ float32_t scaled_type_convert<float32_t, f8x32_ocp_t>(e8m0_bexp_t scale,
160 f8x32_ocp_t x)
161#else
163 f8x32_ocp_t x)
164#endif
165{
166 union
167 {
168 f8x32_ocp_t f8_1x32;
169 f8x16_ocp_t f8_16x2[2];
170 } in{x};
171 union
172 {
173 float32_t float_1x32;
174 float16_t float_16x2[2];
175 } out{};
176
177 ck::static_for<0, 2, 1>{}([&](auto i) {
178 out.float_16x2[i] = scaled_type_convert<float16_t, f8x16_ocp_t>(scale, in.f8_16x2[i]);
179 });
180
181 return out.float_1x32;
182}
183
184// convert 32 x bf8_ocp_t to 32 x fp32
185// @note Host version gives compilation error. Requires extra compiler options.
186template <>
187#if CK_USE_OCP_FP8
188inline __host__ __device__ float32_t scaled_type_convert<float32_t, bf8x32_ocp_t>(e8m0_bexp_t scale,
189 bf8x32_ocp_t x)
190#else
192 bf8x32_ocp_t x)
193#endif
194{
195 union
196 {
197 bf8x32_ocp_t bf8_1x32;
198 bf8x16_ocp_t bf8_16x2[2];
199 } in{x};
200 union
201 {
202 float32_t float_1x32;
203 float16_t float_16x2[2];
204 } out{};
205
206 ck::static_for<0, 2, 1>{}([&](auto i) {
207 out.float_16x2[i] = scaled_type_convert<float16_t, bf8x16_ocp_t>(scale, in.bf8_16x2[i]);
208 });
209
210 return out.float_1x32;
211}
212
213// convert fp32 to fp8
214template <>
215#if CK_USE_OCP_FP8
216inline __host__ __device__ f8_ocp_t scaled_type_convert<f8_ocp_t, float>(e8m0_bexp_t scale, float x)
217#else
219#endif
220{
221#if CK_USE_SR_F8_CONVERSION
223#else
225#endif
226}
227
228// convert fp32 to bf8
229template <>
230#if CK_USE_OCP_FP8
231inline __host__ __device__ bf8_ocp_t scaled_type_convert<bf8_ocp_t, float>(e8m0_bexp_t scale,
232 float x)
233#else
235#endif
236{
237#if CK_USE_SR_F8_CONVERSION
239#else
241#endif
242}
243
244// convert fp32x2 to fp8x2
245template <>
246#if CK_USE_OCP_FP8
247inline __host__ __device__ f8x2_ocp_t scaled_type_convert<f8x2_ocp_t, float2_t>(e8m0_bexp_t scale,
248 float2_t x)
249#else
251#endif
252{
253#if CK_USE_SR_F8_CONVERSION
255#else
257#endif
258}
259// convert fp32x2 to bf8x2
260template <>
261#if CK_USE_OCP_FP8
262inline __host__ __device__ bf8x2_ocp_t scaled_type_convert<bf8x2_ocp_t, float2_t>(e8m0_bexp_t scale,
263 float2_t x)
264#else
266 float2_t x)
267#endif
268{
269#if CK_USE_SR_F8_CONVERSION
271#else
273#endif
274}
275
276// convert fp32x16 to fp8x16
277// @note Host version gives compilation error. Requires extra compiler options.
278template <>
279#if CK_USE_OCP_FP8
280inline __host__ __device__ f8x16_ocp_t
282#else
284 float16_t x)
285#endif
286{
287#if CK_USE_SR_F8_CONVERSION
289#else
291#endif
292}
293
294// convert fp32x16 to bf8x16
295// @note Host version gives compilation error. Requires extra compiler options.
296template <>
297#if CK_USE_OCP_FP8
298inline __host__ __device__ bf8x16_ocp_t
300#else
302 float16_t x)
303#endif
304{
305#if CK_USE_SR_F8_CONVERSION
307#else
309#endif
310}
311
312// convert fp32x32 to fp8x32
313// @note Host version gives compilation error. Requires extra compiler options.
314template <>
315#if CK_USE_OCP_FP8
316inline __host__ __device__ f8x32_ocp_t
318#else
320 float32_t x)
321#endif
322{
323#if CK_USE_SR_F8_CONVERSION
325#else
327#endif
328}
329
330// convert fp32x32 to bf8x32
331// @note Host version gives compilation error. Requires extra compiler options.
332template <>
333#if CK_USE_OCP_FP8
334inline __host__ __device__ bf8x32_ocp_t
336#else
338 float32_t x)
339#endif
340{
341#if CK_USE_SR_F8_CONVERSION
343#else
345#endif
346}
347
348// activate for architectures with native MX support
349#if CK_USE_NATIVE_MX_SUPPORT
350// convert fp4 to fp32
351template <>
352inline __host__ __device__ float scaled_type_convert<float, f4_t>(e8m0_bexp_t scale, f4_t x)
353{
354#if defined(__gfx950__)
355 union
356 {
357 float float_array[2];
358 float2_t float2_array;
359 } float_values{};
360 float_values.float2_array =
361 __builtin_amdgcn_cvt_scalef32_pk_f32_fp4(x, type_convert<float>(scale), 0);
362 return float_values.float_array[0];
363#else
364 return utils::to_float<f4_t>(scale, x);
365#endif
366}
367
368// convert vector of 2 fp4 to vector of 2 fp32
369template <>
370inline __host__ __device__ float2_t scaled_type_convert<float2_t, f4x2_t>(e8m0_bexp_t scale,
371 f4x2_t x)
372{
373#if defined(__gfx950__)
374 union
375 {
376 uint32_t bitwise;
377 f4x2_t f4x2_array[4];
378 } value{};
379 value.f4x2_array[0] = x;
380 return __builtin_amdgcn_cvt_scalef32_pk_f32_fp4(value.bitwise, type_convert<float>(scale), 0);
381#else
383 scale, x.template AsType<f4x2_pk_t>()[Number<0>{}].unpack<>(Number<0>{})),
385 scale, x.template AsType<f4x2_pk_t>()[Number<0>{}].unpack<>(Number<1>{}))};
386 return ret;
387#endif
388}
389
390// convert vector of 32 fp4 to vector of 32 fp32
391template <>
392inline __host__ __device__ float32_t scaled_type_convert<float32_t, f4x32_t>(e8m0_bexp_t scale,
393 f4x32_t x)
394{
395#if defined(__gfx950__)
396 union
397 {
398 f4x32_t f4x32_array;
399 f4x2_t fp4x2[16];
400 } value{x};
401 float2_t op;
402 float32_t ret;
403 float f_scale = type_convert<float>(scale);
404
405 ck::static_for<0, 32 / 2, 1>{}([&](auto idx) {
406 op = __builtin_amdgcn_cvt_scalef32_pk_f32_fp4(value.fp4x2[idx], f_scale, 0);
407 ret[2 * idx] = op[0];
408 ret[2 * idx + 1] = op[1];
409 });
410
411 return ret;
412#else
413 union
414 {
415 float32_t float32_array;
416 float float_array[32];
417 } float_values{};
418 union
419 {
420 __uint128_t bitwise;
421 f4x2_t f4x2_array[16];
422 f4x32_t f4x32_array;
423 } f4_values{bit_cast<__uint128_t>(x)};
424
425 ck::static_for<0, 32 / 2, 1>{}([&](auto idx) {
426 float_values.float_array[2 * idx] = utils::to_float<f4_t>(
427 scale,
428 f4_values.f4x2_array[idx].template AsType<f4x2_pk_t>()[Number<0>{}].template unpack<>(
429 Number<0>{}));
430
431 float_values.float_array[2 * idx + 1] = utils::to_float<f4_t>(
432 scale,
433 f4_values.f4x2_array[idx].template AsType<f4x2_pk_t>()[Number<0>{}].template unpack<>(
434 Number<1>{}));
435 });
436
437 return float_values.float32_array;
438#endif
439}
440
441// convert fp32 to fp4
442template <>
443inline __host__ __device__ f4_t scaled_type_convert<f4_t, float>(e8m0_bexp_t scale, float x)
444{
445#if CK_USE_SR_F4_CONVERSION
446 return f4_convert_sr(x, type_convert<float>(scale));
447#else
448 return f4_convert_rne(x, type_convert<float>(scale));
449#endif
450}
451
452// convert vector of 2 fp32 to vector of 2 fp4
453template <>
454inline __host__ __device__ f4x2_t scaled_type_convert<f4x2_t, float2_t>(e8m0_bexp_t scale,
455 float2_t x)
456{
457#if CK_USE_SR_F4_CONVERSION
458 return f4_convert_sr(x, type_convert<float>(scale));
459#else
460 return f4_convert_rne(x, type_convert<float>(scale));
461#endif
462}
463
464// convert vector of 32 fp32 to vector of 32 fp4
465template <>
466inline __host__ __device__ f4x32_t scaled_type_convert<f4x32_t, float32_t>(e8m0_bexp_t scale,
467 float32_t x)
468{
469#if CK_USE_SR_F4_CONVERSION
470 return f4_convert_sr(x, type_convert<float>(scale));
471#else
472 return f4_convert_rne(x, type_convert<float>(scale));
473#endif
474}
475
484template <>
485inline __host__ __device__ float scaled_type_convert<float, f6_t>(e8m0_bexp_t scale, f6_t x)
486{
487#if defined(__gfx950__)
488 union
489 {
490 f6x32_t f6_vector;
491 f6_t f6_array[32];
492 } in{x};
493
494 union
495 {
496 float32_t float_vector;
497 float float_array[32];
498 } out{};
499
500 out.float_vector = __builtin_amdgcn_cvt_scalef32_pk32_f32_fp6(
501 in.f6_vector.template AsType<f6x32_t::data_t>()[Number<0>{}], type_convert<float>(scale));
502 return out.float_array[0];
503#else
504 return utils::to_float<f6_t>(scale, x);
505#endif
506}
507
516template <>
517inline __host__ __device__ float32_t scaled_type_convert<float32_t, f6x32_t>(e8m0_bexp_t scale,
518 f6x32_t x)
519{
520#if defined(__gfx950__)
521 return __builtin_amdgcn_cvt_scalef32_pk32_f32_fp6(
522 x.template AsType<f6x32_t::data_t>()[Number<0>{}], type_convert<float>(scale));
523#else
524 union
525 {
526 f6x32_t f6_vector;
527 f6_t f6_array[32];
528 } in{x};
529
530 union
531 {
532 float32_t float_vector;
533 float float_array[32];
534 } out{};
535
536 ck::static_for<0, 32, 1>{}(
537 [&](auto i) { out.float_array[i] = utils::to_float<f6_t>(scale, in.f6_array[i]); });
538
539 return out.float_vector;
540#endif
541}
542
551template <>
552inline __host__ __device__ float scaled_type_convert<float, bf6_t>(e8m0_bexp_t scale, bf6_t x)
553{
554#if defined(__gfx950__)
555 union
556 {
557 bf6x32_t bf6_vector;
558 bf6_t bf6_array[32];
559 } in{x};
560
561 union
562 {
563 float32_t float_vector;
564 float float_array[32];
565 } out{};
566
567 out.float_vector = __builtin_amdgcn_cvt_scalef32_pk32_f32_bf6(
568 in.bf6_vector.template AsType<bf6x32_t::data_t>()[Number<0>{}], type_convert<float>(scale));
569 return out.float_array[0];
570#else
571 return utils::to_float<bf6_t>(scale, x);
572#endif
573}
574
583template <>
584inline __host__ __device__ float32_t scaled_type_convert<float32_t, bf6x32_t>(e8m0_bexp_t scale,
585 bf6x32_t x)
586{
587#if defined(__gfx950__)
588 return __builtin_amdgcn_cvt_scalef32_pk32_f32_bf6(
589 x.template AsType<bf6x32_t::data_t>()[Number<0>{}], type_convert<float>(scale));
590#else
591 union
592 {
593 bf6x32_t bf6_vector;
594 bf6_t bf6_array[32];
595 } in{x};
596
597 union
598 {
599 float32_t float_vector;
600 float float_array[32];
601 } out{};
602
603 ck::static_for<0, 32, 1>{}(
604 [&](auto i) { out.float_array[i] = utils::to_float<bf6_t>(scale, in.bf6_array[i]); });
605
606 return out.float_vector;
607#endif
608}
609
621template <>
622inline __host__ __device__ f6_t scaled_type_convert<f6_t, float>(e8m0_bexp_t scale, float x)
623{
624#if CK_USE_SR_F6_CONVERSION
625 return f6_convert_sr(x, type_convert<float>(scale));
626#else
627 return f6_convert_rne(x, type_convert<float>(scale));
628#endif
629}
630
642template <>
643inline __host__ __device__ f6x32_t scaled_type_convert<f6x32_t, float32_t>(e8m0_bexp_t scale,
644 float32_t x)
645{
646#if CK_USE_SR_F6_CONVERSION
647 return f6_convert_sr(x, type_convert<float>(scale));
648#else
649 return f6_convert_rne(x, type_convert<float>(scale));
650#endif
651}
652
664template <>
665inline __host__ __device__ bf6_t scaled_type_convert<bf6_t, float>(e8m0_bexp_t scale, float x)
666{
667#if CK_USE_SR_F6_CONVERSION
668 return bf6_convert_sr(x, type_convert<float>(scale));
669#else
670 return bf6_convert_rne(x, type_convert<float>(scale));
671#endif
672}
673
685template <>
686inline __host__ __device__ bf6x32_t scaled_type_convert<bf6x32_t, float32_t>(e8m0_bexp_t scale,
687 float32_t x)
688{
689#if CK_USE_SR_F6_CONVERSION
690 return bf6_convert_sr(x, type_convert<float>(scale));
691#else
692 return bf6_convert_rne(x, type_convert<float>(scale));
693#endif
694}
695#endif // #if CK_USE_NATIVE_MX_SUPPORT
696
697} // namespace ck
fp8_storage_t fp8x2_storage_t
Definition amd_ck_fp8.hpp:88
__host__ __device__ float to_float< bf6_t >(e8m0_bexp_t const scale, bf6_t const data)
Converts an bf6_t value to a float based on an e8m0_bexp_t scale factor.
Definition mxf6_utils.hpp:165
__host__ __device__ float to_float< f4_t >(e8m0_bexp_t const scale, f4_t const data)
Definition mxf4_utils.hpp:40
__host__ __device__ float to_float< f6_t >(e8m0_bexp_t const scale, f6_t const data)
Converts an f6_t value to a float based on an e8m0_bexp_t scale factor.
Definition mxf6_utils.hpp:139
Definition ck.hpp:268
typename vector_type< float, 16 >::type float16_t
Definition dtype_vector.hpp:2148
__host__ bf8x32_ocp_t scaled_type_convert< bf8x32_ocp_t, float32_t >(e8m0_bexp_t scale, float32_t x)
Definition scaled_type_convert.hpp:337
__host__ f8_ocp_t scaled_type_convert< f8_ocp_t, float >(e8m0_bexp_t scale, float x)
Definition scaled_type_convert.hpp:218
__host__ __device__ constexpr Y mxf8_convert_rne(X x, float scale)
__host__ float16_t scaled_type_convert< float16_t, bf8x16_ocp_t >(e8m0_bexp_t scale, bf8x16_ocp_t x)
Definition scaled_type_convert.hpp:133
__host__ __device__ f6_t f6_convert_rne(float x, float scale=1.0f)
Converts a float to a 6-bit float type (f6_t) using round-to-nearest-even.
Definition utility/type_convert.hpp:1801
__host__ float scaled_type_convert< float, bf8_ocp_t >(e8m0_bexp_t scale, bf8_ocp_t x)
Definition scaled_type_convert.hpp:48
__host__ __device__ bf6_t bf6_convert_sr(float x, float scale=1.0f)
Converts a float to the 6-bit BF6 type using stochastic rounding.
Definition utility/type_convert.hpp:2204
__host__ float scaled_type_convert< float, f8_ocp_t >(e8m0_bexp_t scale, f8_ocp_t x)
Definition scaled_type_convert.hpp:30
__host__ float16_t scaled_type_convert< float16_t, f8x16_ocp_t >(e8m0_bexp_t scale, f8x16_ocp_t x)
Definition scaled_type_convert.hpp:104
__host__ __device__ bf6_t bf6_convert_rne(float x, float scale=1.0f)
Converts a float to the 6-bit BF6 type using round-to-nearest-even.
Definition utility/type_convert.hpp:2137
typename vector_type< f8_ocp_t, 32 >::type f8x32_ocp_t
Definition dtype_vector.hpp:2204
__host__ __device__ f4_t f4_convert_rne(float x, float scale=1.0f)
Definition utility/type_convert.hpp:1468
__host__ bf8x2_ocp_t scaled_type_convert< bf8x2_ocp_t, float2_t >(e8m0_bexp_t scale, float2_t x)
Definition scaled_type_convert.hpp:265
__host__ bf8x16_ocp_t scaled_type_convert< bf8x16_ocp_t, float16_t >(e8m0_bexp_t scale, float16_t x)
Definition scaled_type_convert.hpp:301
integral_constant< index_t, N > Number
Definition number.hpp:12
typename vector_type< bf8_ocp_t, 32 >::type bf8x32_ocp_t
Definition dtype_vector.hpp:2212
__host__ float32_t scaled_type_convert< float32_t, f8x32_ocp_t >(e8m0_bexp_t scale, f8x32_ocp_t x)
Definition scaled_type_convert.hpp:162
__host__ f8x2_ocp_t scaled_type_convert< f8x2_ocp_t, float2_t >(e8m0_bexp_t scale, float2_t x)
Definition scaled_type_convert.hpp:250
__host__ float2_t scaled_type_convert< float2_t, f8x2_ocp_t >(e8m0_bexp_t scale, f8x2_ocp_t x)
Definition scaled_type_convert.hpp:66
__host__ f8x32_ocp_t scaled_type_convert< f8x32_ocp_t, float32_t >(e8m0_bexp_t scale, float32_t x)
Definition scaled_type_convert.hpp:319
typename vector_type< bf8_ocp_t, 2 >::type bf8x2_ocp_t
Definition dtype_vector.hpp:2208
__host__ __device__ constexpr auto unpack(F &&f, X &&x)
Definition functional4.hpp:46
__host__ float2_t scaled_type_convert< float2_t, bf8x2_ocp_t >(e8m0_bexp_t scale, bf8x2_ocp_t x)
Definition scaled_type_convert.hpp:84
typename vector_type< float, 2 >::type float2_t
Definition dtype_vector.hpp:2145
typename vector_type< f4x2_pk_t, 1 >::type f4x2_t
Definition dtype_vector.hpp:2258
unsigned _BitInt(4) f4_t
Definition data_type.hpp:33
__host__ __device__ f4_t f4_convert_sr(float x, float scale=1.0f)
Definition utility/type_convert.hpp:1546
_BitInt(6) f6_t
Definition data_type.hpp:34
__host__ __device__ constexpr Y type_convert(X x)
Definition utility/type_convert.hpp:98
typename vector_type< bf6x32_pk_t, 1 >::type bf6x32_t
Definition dtype_vector.hpp:2273
typename vector_type< f8_ocp_t, 2 >::type f8x2_ocp_t
Definition dtype_vector.hpp:2200
typename vector_type< float, 32 >::type float32_t
Definition dtype_vector.hpp:2149
unsigned _BitInt(6) bf6_t
Definition data_type.hpp:35
typename vector_type< f6x32_pk_t, 1 >::type f6x32_t
Definition dtype_vector.hpp:2268
typename vector_type< f8_ocp_t, 16 >::type f8x16_ocp_t
Definition dtype_vector.hpp:2203
__host__ bf8_ocp_t scaled_type_convert< bf8_ocp_t, float >(e8m0_bexp_t scale, float x)
Definition scaled_type_convert.hpp:234
__host__ __device__ constexpr Y mxf8_convert_sr(X x, float scale)
typename vector_type< bf8_ocp_t, 16 >::type bf8x16_ocp_t
Definition dtype_vector.hpp:2211
typename vector_type< f4x2_pk_t, 16 >::type f4x32_t
Definition dtype_vector.hpp:2262
__host__ __device__ constexpr Y bit_cast(const X &x)
Definition type.hpp:306
__host__ __device__ f6_t f6_convert_sr(float x, float scale=1.0f)
Converts a float to the 6-bit floating-point type (f6_t) using stochastic rounding.
Definition utility/type_convert.hpp:1866
__host__ f8x16_ocp_t scaled_type_convert< f8x16_ocp_t, float16_t >(e8m0_bexp_t scale, float16_t x)
Definition scaled_type_convert.hpp:283
__host__ constexpr Y scaled_type_convert(e8m0_bexp_t scale, X x)
__host__ float32_t scaled_type_convert< float32_t, bf8x32_ocp_t >(e8m0_bexp_t scale, bf8x32_ocp_t x)
Definition scaled_type_convert.hpp:191
const GenericPointer< typename T::ValueType > T2 value
Definition pointer.h:1697
unsigned int uint32_t
Definition stdint.h:126
Definition amd_ck_fp8.hpp:369
data_type data
Definition amd_ck_fp8.hpp:371
Unsigned representation of a conventional biased Float32 exponent.
Definition utility/e8m0.hpp:26
Definition amd_ck_fp8.hpp:323
data_type data
Definition amd_ck_fp8.hpp:325
Definition functional2.hpp:33