25#ifndef _GLIBCXX_SIMD_MASK_H
26#define _GLIBCXX_SIMD_MASK_H 1
29#pragma GCC system_header
32#if __cplusplus >= 202400L
34#include "simd_iterator.h"
44#pragma GCC diagnostic push
45#pragma GCC diagnostic ignored "-Wpsabi"
47namespace std _GLIBCXX_VISIBILITY(default)
49_GLIBCXX_BEGIN_NAMESPACE_VERSION
52 template <
unsigned _Np>
56 operator()(
unsigned __i,
unsigned __size)
const
58 if (__size % (2 * _Np) != 0)
60 else if (std::has_single_bit(_Np))
62 else if (__i % (2 * _Np) >= _Np)
69 template <
size_t _Np,
size_t _Mp>
71 __bitset_split(
const bitset<_Mp>& __b)
73 constexpr auto __bits_per_word = __CHAR_BIT__ * __SIZEOF_LONG__;
74 if constexpr (_Np % __bits_per_word == 0)
79 bitset<_Mp - _Np> _M_hi;
81 return __builtin_bit_cast(_Tmp, __b);
85 constexpr auto __bits_per_ullong = __CHAR_BIT__ * __SIZEOF_LONG_LONG__;
86 static_assert(_Mp <= __bits_per_ullong);
87 using _Lo = _Bitmask<_Np>;
88 using _Hi = _Bitmask<_Mp - _Np>;
94 return _Tmp {
static_cast<_Lo
>(__b.to_ullong()),
static_cast<_Hi
>(__b.to_ullong() >> _Np)};
98 static_assert(__bitset_split<64>(bitset<128>(1))._M_lo == bitset<64>(1));
99 static_assert(__bitset_split<64>(bitset<128>(1))._M_hi == bitset<64>(0));
103 template <
typename _Tp,
typename _Vp, _ArchTraits _Traits = {}>
112 template <__vectorizable _Tp, __simd_vec_type _Vp, _ArchTraits _Traits>
114 struct rebind<_Tp, _Vp, _Traits>
115 {
using type = __similar_vec<_Tp, _Vp::size(),
typename _Vp::abi_type>; };
120 template <__vectorizable _Tp, __simd_mask_type _Mp, _ArchTraits _Traits>
122 struct rebind<_Tp, _Mp, _Traits>
123 {
using type = __similar_mask<_Tp, _Mp::size(),
typename _Mp::abi_type>; };
125 template <
typename _Tp,
typename _Vp>
126 using rebind_t =
typename rebind<_Tp, _Vp>::type;
129 template <__simd_size_type _Np,
typename _Vp, _ArchTraits _Traits = {}>
133 template <__simd_
size_type _Np, __simd_vec_type _Vp, _ArchTraits _Traits>
136 struct resize<_Np, _Vp, _Traits>
137 {
using type = __similar_vec<typename _Vp::value_type, _Np, typename _Vp::abi_type>; };
139 template <__simd_
size_type _Np, __simd_mask_type _Mp, _ArchTraits _Traits>
142 struct resize<_Np, _Mp, _Traits>
144 using _A1 =
decltype(__abi_rebind<__mask_element_size<_Mp>, _Np,
typename _Mp::abi_type,
147 static_assert(__abi_tag<_A1>);
149 static_assert(_Mp::abi_type::_S_variant == _A1::_S_variant || __scalar_abi_tag<_A1>
150 || __scalar_abi_tag<typename _Mp::abi_type>);
152 using type = basic_mask<__mask_element_size<_Mp>, _A1>;
155 template <__simd_
size_type _Np,
typename _Vp>
156 using resize_t =
typename resize<_Np, _Vp>::type;
161 inline constexpr __simd_size_type uninit_element = zero_element + 1;
164 template<__simd_size_type _Np = 0, __simd_vec_or_mask_type _Vp,
165 __index_permutation_function<_Vp> _IdxMap>
166 [[__gnu__::__always_inline__]]
167 constexpr resize_t<_Np == 0 ? _Vp::size() : _Np, _Vp>
168 permute(const _Vp& __v, _IdxMap&& __idxmap)
169 {
return resize_t<_Np == 0 ? _Vp::size() : _Np, _Vp>::_S_static_permute(__v, __idxmap); }
172 template<__simd_vec_or_mask_type _Vp, __simd_
integral _Ip>
173 [[__gnu__::__always_inline__]]
174 constexpr resize_t<_Ip::size(), _Vp>
175 permute(
const _Vp& __v,
const _Ip& __indices)
176 {
return __v[__indices]; }
179 template<__simd_vec_type _Vp,
typename _Ap>
180 [[__gnu__::__always_inline__]]
182 chunk(
const basic_vec<typename _Vp::value_type, _Ap>& __x)
noexcept
183 {
return __x.template _M_chunk<_Vp>(); }
185 template<__simd_mask_type _Mp,
typename _Ap>
186 [[__gnu__::__always_inline__]]
188 chunk(
const basic_mask<__mask_element_size<_Mp>, _Ap>& __x)
noexcept
189 {
return __x.template _M_chunk<_Mp>(); }
191 template<__simd_
size_type _Np,
typename _Tp,
typename _Ap>
192 [[__gnu__::__always_inline__]]
194 chunk(
const basic_vec<_Tp, _Ap>& __x)
noexcept
195 ->
decltype(chunk<resize_t<_Np, basic_vec<_Tp, _Ap>>>(__x))
196 {
return chunk<resize_t<_Np, basic_vec<_Tp, _Ap>>>(__x); }
198 template<__simd_
size_type _Np,
size_t _Bytes,
typename _Ap>
199 [[__gnu__::__always_inline__]]
201 chunk(
const basic_mask<_Bytes, _Ap>& __x)
noexcept
202 ->
decltype(chunk<resize_t<_Np, basic_mask<_Bytes, _Ap>>>(__x))
203 {
return chunk<resize_t<_Np, basic_mask<_Bytes, _Ap>>>(__x); }
206 template<
typename _Tp,
typename _A0,
typename... _Abis>
207 constexpr resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_vec<_Tp, _A0>>
208 cat(
const basic_vec<_Tp, _A0>& __x0,
const basic_vec<_Tp, _Abis>&... __xs)
noexcept
210 return resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_vec<_Tp, _A0>>
211 ::_S_concat(__x0, __xs...);
215 template<
size_t _Bytes,
typename _A0,
typename... _Abis>
216 constexpr resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_mask<_Bytes, _A0>>
217 cat(
const basic_mask<_Bytes, _A0>& __x0,
const basic_mask<_Bytes, _Abis>&... __xs)
noexcept
219 return resize_t<(_A0::_S_size + ... + _Abis::_S_size), basic_mask<_Bytes, _A0>>
220 ::_S_concat(__x0, __xs...);
225 __packs_to_skip_at_front(
int __offset, initializer_list<int> __sizes)
229 for (
int __s : __sizes)
240 __packs_to_skip_at_back(
int __offset,
int __max, initializer_list<int> __sizes)
244 for (
int __s : __sizes)
249 return int(__sizes.size()) - __i;
256 template <
typename _Dst>
257 [[__gnu__::__always_inline__]]
259 __extract_simd_at(
auto _Offset,
const _Dst& __r,
const auto&...)
260 requires(_Offset.value == 0)
263 template <
typename _Dst,
typename _V0>
264 [[__gnu__::__always_inline__]]
266 __extract_simd_at(
auto _Offset,
const _V0&,
const _Dst& __r,
const auto&...)
267 requires(_Offset.value == _V0::size.value)
270 template <
typename _Dst,
typename... _Vs>
271 [[__gnu__::__always_inline__]]
273 __extract_simd_at(
auto _Offset,
const _Vs&... __xs)
275 using _Adst =
typename _Dst::abi_type;
276 if constexpr (_Adst::_S_nreg >= 2)
278 using _Dst0 = remove_cvref_t<decltype(declval<_Dst>()._M_get_low())>;
279 using _Dst1 = remove_cvref_t<decltype(declval<_Dst>()._M_get_high())>;
280 return _Dst::_S_init(__extract_simd_at<_Dst0>(_Offset, __xs...),
281 __extract_simd_at<_Dst1>(_Offset + _Dst0::size, __xs...));
285 using _Ret = remove_cvref_t<decltype(declval<_Dst>()._M_get())>;
286 constexpr bool __use_bitmask = __simd_mask_type<_Dst> && _Adst::_S_is_bitmask;
287 constexpr int __dst_full_size = __bit_ceil(
unsigned(_Adst::_S_size));
288 constexpr int __nargs =
sizeof...(__xs);
289 using _Afirst =
typename _Vs...[0]::abi_type;
290 using _Alast =
typename _Vs...[__nargs - 1]::abi_type;
291 const auto& __x0 = __xs...[0];
292 const auto& __xlast = __xs...[__nargs - 1];
293 constexpr int __ninputs = (_Vs::size.value + ...);
294 if constexpr (_Offset.value >= _Afirst::_S_size
295 || __ninputs - _Offset.value - _Alast::_S_size >= _Adst::_S_size)
297 constexpr int __skip_front = __packs_to_skip_at_front(_Offset.value,
298 {_Vs::size.value...});
299 constexpr int __skip_back = __packs_to_skip_at_back(_Offset.value, _Adst::_S_size,
300 {_Vs::size.value...});
301 static_assert(__skip_front > 0 || __skip_back > 0);
302 constexpr auto [...__skip] = _IotaArray<__skip_front>;
303 constexpr auto [...__is] = _IotaArray<__nargs - __skip_front - __skip_back>;
304 constexpr int __new_offset = _Offset.value - (0 + ... + _Vs...[__skip]
::size.value);
305 return __extract_simd_at<_Dst>(cw<__new_offset>, __xs...[__is + __skip_front]...);
307 else if constexpr (_Adst::_S_size == 1)
309 return _Dst(__x0[_Offset.value]);
311 else if constexpr (_Afirst::_S_nreg >= 2 || _Alast::_S_nreg >= 2)
313 constexpr bool __flatten_first = _Afirst::_S_nreg >= 2;
314 constexpr bool __flatten_last = __nargs > 1 && _Alast::_S_nreg >= 2;
315 constexpr auto [...__is] = _IotaArray<__nargs - __flatten_first - __flatten_last>;
316 if constexpr (__flatten_first && __flatten_last)
317 return __extract_simd_at<_Dst>(
318 _Offset, __x0._M_get_low(), __x0._M_get_high(), __xs...[__is + 1]...,
319 __xlast._M_get_low(), __xlast._M_get_high());
320 else if constexpr (__flatten_first)
321 return __extract_simd_at<_Dst>(
322 _Offset, __x0._M_get_low(), __x0._M_get_high(), __xs...[__is + 1]...);
324 return __extract_simd_at<_Dst>(
325 _Offset, __xs...[__is]..., __xlast._M_get_low(), __xlast._M_get_high());
327 else if constexpr (__simd_mask_type<_Dst>
328 && ((_Adst::_S_variant != _Vs::abi_type::_S_variant
329 && !__scalar_abi_tag<typename _Vs::abi_type>) || ...))
331 return __extract_simd_at<_Dst>(
332 _Offset,
static_cast<const resize_t<_Vs::size.value, _Dst
>&>(__xs)...);
337 else if constexpr (__nargs == 1)
339 if constexpr (__use_bitmask)
340 return _Dst(_Ret(__x0._M_to_uint() >> _Offset.value));
342 return _VecOps<_Ret>::_S_extract(__x0._M_concat_data(
false), _Offset);
344 else if constexpr (__use_bitmask)
346 static_assert(_Afirst::_S_nreg == 1);
347 static_assert(_Offset.value < _Afirst::_S_size);
348 int __offset = -_Offset.value;
350 template for (
const auto& __x : {__xs...})
353 __r = _Ret(__x._M_to_uint() >> -__offset);
354 else if (__offset < _Adst::_S_size)
355 __r |= _Ret(_Ret(__x._M_to_uint()) << __offset);
356 __offset += __x.size.value;
360 else if constexpr (__nargs == 2 && _Offset == 0 && _Adst::_S_nreg == 1
361 && _Afirst::_S_size >= _Alast::_S_size
362 && __has_single_bit(
unsigned(_Afirst::_S_size)))
364 if constexpr (_Afirst::_S_size == 1)
366 return _Ret{__x0._M_concat_data()[0], __xlast._M_concat_data()[0]};
369 const auto __v0 = __x0._M_concat_data();
370 const auto __v1 = __vec_zero_pad_to<sizeof(__v0)>(__xlast._M_concat_data());
371 return __vec_concat(__v0, __v1);
374 else if constexpr (__nargs == 2 && _Adst::_S_nreg == 1 && _Offset == 0
375 && _Afirst::_S_nreg == 1 && _Alast::_S_size == 1)
377 _Ret __r = __vec_zero_pad_to<sizeof(_Ret)>(__x0._M_get());
378 __vec_set(__r, _Afirst::_S_size, __xlast._M_concat_data()[0]);
381 else if constexpr (__nargs == 2 && _Adst::_S_nreg == 1 && _Offset == 0
382 && _Afirst::_S_nreg == 1 && _Alast::_S_size == 2)
384 _Ret __r = __vec_zero_pad_to<sizeof(_Ret)>(__x0._M_concat_data());
385 const auto __x1 = __xlast._M_concat_data();
386 if constexpr (
sizeof(__x1) <=
sizeof(double) && (_Afirst::_S_size & 1) == 0)
388 using _Up = __conditional_t<
389 is_floating_point_v<__vec_value_type<_Ret>>,
390 __conditional_t<
sizeof(__x1) ==
sizeof(
double), double,
float>,
391 __integer_from<
sizeof(__x1)>>;
392 auto __r2 = __vec_bit_cast<_Up>(__r);
393 __vec_set(__r2, _Afirst::_S_size / 2, __vec_bit_cast<_Up>(__x1)[0]);
394 __r =
reinterpret_cast<_Ret
>(__r2);
398 __vec_set(__r, _Afirst::_S_size, __x1[0]);
399 __vec_set(__r, _Afirst::_S_size + 1, __x1[1]);
403 else if constexpr (__nargs == 2 && _Afirst::_S_nreg == 1 && _Alast::_S_nreg == 1)
405 constexpr auto [...__is] = _IotaArray<__dst_full_size>;
406 constexpr int __v2_offset = __width_of<
decltype(__x0._M_concat_data())>;
407 return __builtin_shufflevector(
408 __x0._M_concat_data(), __xlast._M_concat_data(), [](
int __i)
consteval {
409 if (__i < _Afirst::_S_size)
411 __i -= _Afirst::_S_size;
412 if (__i < _Alast::_S_size)
413 return __i + __v2_offset;
416 }(__is + _Offset.value)...);
418 else if (__is_const_known(__xs...) || __ninputs == _Adst::_S_size)
420 return _VecOps<_Ret>::_S_extract(
421 __vec_concat_sized<__xs.size.value...>(__xs._M_concat_data(
false)...),
426 alignas(_Ret) __vec_value_type<_Ret>
427 __tmp[
std::max(__ninputs, _Offset.value + __dst_full_size)] = {};
429 template for (
const auto& __x : {__xs...})
431 if constexpr (__simd_mask_type<_Dst>)
432 (-__x)._M_store(__tmp + __offset);
434 __x._M_store(__tmp + __offset);
435 __offset += __x.size.value;
438 __builtin_memcpy(&__r, __tmp + _Offset.value,
sizeof(_Ret));
445 template <
size_t _Bytes,
typename _Ap>
449 using value_type = bool;
451 using abi_type = _Ap;
453#define _GLIBCXX_DELETE_SIMD "This specialization is disabled because of an invalid combination " \
454 "of template arguments to basic_mask."
456 basic_mask() =
delete(_GLIBCXX_DELETE_SIMD);
458 ~basic_mask() =
delete(_GLIBCXX_DELETE_SIMD);
460 basic_mask(
const basic_mask&) =
delete(_GLIBCXX_DELETE_SIMD);
462 basic_mask& operator=(
const basic_mask&) =
delete(_GLIBCXX_DELETE_SIMD);
464#undef _GLIBCXX_DELETE_SIMD
467 template <
size_t _Bytes,
typename _Ap>
470 using _Mp = basic_mask<_Bytes, _Ap>;
473 using _VecType = __simd_vec_from_mask_t<_Bytes, _Ap>;
475 static_assert(destructible<_VecType> || _Bytes >
sizeof(0ull));
478 using iterator = __iterator<_Mp>;
480 using const_iterator = __iterator<const _Mp>;
484 {
return {
static_cast<_Mp&
>(*this), 0}; }
486 constexpr const_iterator
487 begin() const noexcept
490 constexpr const_iterator
492 {
return {
static_cast<const _Mp&
>(*this), 0}; }
494 constexpr default_sentinel_t
498 constexpr default_sentinel_t
499 cend() const noexcept
502 static constexpr auto size = __simd_size_c<_Ap::_S_size>;
504 _MaskBase() =
default;
507 template <
size_t _UBytes,
typename _UAbi>
508 requires (_Ap::_S_size != _UAbi::_S_size)
510 _MaskBase(
const basic_mask<_UBytes, _UAbi>&) =
delete(
"size mismatch");
512 template <
typename _Up,
typename _UAbi>
514 _MaskBase(
const basic_vec<_Up, _UAbi>&)
515 =
delete(
"use operator! or a comparison to convert a vec into a mask");
517 template <
typename _Up,
typename _UAbi>
518 requires (_Ap::_S_size != _UAbi::_S_size)
519 operator basic_vec<_Up, _UAbi>()
const
520 =
delete(
"size mismatch");
523 template <
size_t _Bytes, __abi_tag _Ap>
524 requires (_Ap::_S_nreg == 1)
525 class basic_mask<_Bytes, _Ap>
526 : public _MaskBase<_Bytes, _Ap>
528 using _Base = _MaskBase<_Bytes, _Ap>;
530 using _VecType = _Base::_VecType;
532 template <
size_t,
typename>
533 friend class basic_mask;
535 template <
typename,
typename>
536 friend class basic_vec;
538 static constexpr int _S_size = _Ap::_S_size;
540 using _DataType =
typename _Ap::template _MaskDataType<_Bytes>;
542 static constexpr bool _S_has_bool_member = is_same_v<_DataType, bool>;
544 static constexpr bool _S_is_scalar = _S_has_bool_member;
546 static constexpr bool _S_use_bitmask = _Ap::_S_is_bitmask;
548 static constexpr int _S_full_size = [] {
549 if constexpr (_S_is_scalar)
551 else if constexpr (_S_use_bitmask && _S_size < __CHAR_BIT__)
554 return __bit_ceil(
unsigned(_S_size));
557 static constexpr bool _S_is_partial = _S_size != _S_full_size;
559 static constexpr _DataType _S_implicit_mask = [] {
560 if constexpr (_S_is_scalar)
562 else if (!_S_is_partial)
563 return _DataType(~_DataType());
564 else if constexpr (_S_use_bitmask)
565 return _DataType((_DataType(1) << _S_size) - 1);
568 constexpr auto [...__is] = _IotaArray<_S_full_size>;
569 return _DataType{ (__is < _S_size ? -1 : 0)... };
575 static constexpr size_t _S_padding_bytes = 0;
580 using value_type = bool;
582 using abi_type = _Ap;
584 using iterator = _Base::iterator;
586 using const_iterator = _Base::const_iterator;
589 [[__gnu__::__always_inline__]]
590 static constexpr basic_mask
591 _S_init(_DataType __x)
598 [[__gnu__::__always_inline__]]
599 static constexpr basic_mask
600 _S_init(unsigned_integral
auto __bits)
601 {
return basic_mask(__bits); }
603 [[__gnu__::__always_inline__]]
604 constexpr const _DataType&
614 template <
size_t _UBytes,
typename _UAbi>
615 [[__gnu__::__always_inline__]]
616 static constexpr basic_mask
617 _S_recursive_bit_cast(
const basic_mask<_UBytes, _UAbi>& __x)
618 {
return __builtin_bit_cast(basic_mask, __x._M_concat_data()); }
620 [[__gnu__::__always_inline__]]
622 _M_concat_data(
bool __do_sanitize = _S_is_partial)
const
624 if constexpr (_S_is_scalar)
625 return __vec_builtin_type<__integer_from<_Bytes>, 1>{__integer_from<_Bytes>(-_M_data)};
628 if constexpr (_S_is_partial)
630 return _DataType(_M_data & _S_implicit_mask);
640 template <_ArchTraits _Traits = {}>
641 [[__gnu__::__always_inline__]]
642 static constexpr basic_mask
643 _S_partial_mask_of_n(
int __n)
645 static_assert(!_S_is_scalar);
646 if constexpr (!_S_use_bitmask)
648 using _Ip = __integer_from<_Bytes>;
650 "_S_partial_mask_of_n without _S_use_bitmask requires "
651 "positive __n that does not overflow.");
652 constexpr _DataType __0123
653 = __builtin_bit_cast(_DataType, _IotaArray<_Ip(_S_full_size)>);
654 return basic_mask(__0123 < _Ip(__n));
658 __glibcxx_simd_precondition(__n >= 0 && __n <= 255,
659 "The x86 BZHI instruction requires __n to "
660 "only use bits 0:7");
661#if __has_builtin(__builtin_ia32_bzhi_si)
662 if constexpr (_S_size <= 32 && _Traits._M_have_bmi2())
663 return _S_init(_Bitmask<_S_size>(
664 __builtin_ia32_bzhi_si(~0u >> (32 - _S_size),
unsigned(__n))));
666#if __has_builtin(__builtin_ia32_bzhi_di)
667 else if constexpr (_S_size <= 64 && _Traits._M_have_bmi2())
668 return _S_init(__builtin_ia32_bzhi_di(~0ull >> (64 - _S_size),
unsigned(__n)));
670 if constexpr (_S_size <= 32)
672 __glibcxx_simd_precondition(__n < 32,
"invalid shift");
673 return _S_init(_Bitmask<_S_size>((1u <<
unsigned(__n)) - 1));
675 else if constexpr (_S_size <= 64)
677 __glibcxx_simd_precondition(__n < 64,
"invalid shift");
678 return _S_init((1ull <<
unsigned(__n)) - 1);
681 static_assert(
false);
685 [[__gnu__::__always_inline__]]
686 constexpr basic_mask&
689 if constexpr (_S_use_bitmask)
690 _M_data &= ((_M_data >> 1) & 0x5555'5555'5555'5555ull)
691 | ((_M_data << 1) & ~0x5555'5555'5555'5555ull);
693 _M_data &= _VecOps<_DataType>::_S_swap_neighbors(_M_data);
697 [[__gnu__::__always_inline__]]
698 constexpr basic_mask&
701 if constexpr (_S_use_bitmask)
702 _M_data |= ((_M_data >> 1) & 0x5555'5555'5555'5555ull)
703 | ((_M_data << 1) & ~0x5555'5555'5555'5555ull);
705 _M_data |= _VecOps<_DataType>::_S_swap_neighbors(_M_data);
709 template <
typename _Mp>
710 [[__gnu__::__always_inline__]]
711 constexpr auto _M_chunk() const noexcept
713 constexpr int __n = _S_size / _Mp::_S_size;
714 constexpr int __rem = _S_size % _Mp::_S_size;
715 constexpr auto [...__is] = _IotaArray<__n>;
716 if constexpr (__rem == 0)
717 return array<_Mp, __n>{__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>, *
this)...};
720 using _Rest = resize_t<__rem, _Mp>;
721 return tuple(__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>, *
this)...,
722 __extract_simd_at<_Rest>(cw<_Mp::_S_size * __n>, *
this));
726 [[__gnu__::__always_inline__]]
727 static constexpr const basic_mask&
728 _S_concat(
const basic_mask& __x0)
noexcept
731 template <
typename... _As>
732 requires (
sizeof...(_As) > 1)
733 [[__gnu__::__always_inline__]]
734 static constexpr basic_mask
735 _S_concat(
const basic_mask<_Bytes, _As>&... __xs)
noexcept
737 static_assert(_S_size == (_As::_S_size + ...));
738 return __extract_simd_at<basic_mask>(cw<0>, __xs...);
742 basic_mask() =
default;
745 [[__gnu__::__always_inline__]]
747 basic_mask(_DataType __x)
requires(!_S_is_scalar && !_S_use_bitmask)
751 [[__gnu__::__always_inline__]]
753 operator _DataType()
requires(!_S_is_scalar && !_S_use_bitmask)
757 [[__gnu__::__always_inline__]]
759 basic_mask(same_as<bool>
auto __x)
noexcept
760 : _M_data(__x ? _S_implicit_mask : _DataType())
764 template <
size_t _UBytes,
typename _UAbi>
765 requires (_S_size == _UAbi::_S_size)
766 [[__gnu__::__always_inline__]]
767 constexpr explicit(__is_mask_conversion_explicit<_Ap, _UAbi>(_Bytes, _UBytes))
768 basic_mask(
const basic_mask<_UBytes, _UAbi>& __x) noexcept
769 : _M_data([&] [[__gnu__::__always_inline__]] {
770 using _UV = basic_mask<_UBytes, _UAbi>;
772 if constexpr (_S_is_scalar)
776 else if constexpr (_UV::_S_is_scalar)
778 constexpr auto [...__is] = _IotaArray<_S_size>;
779 if constexpr (_S_use_bitmask)
780 return ((_DataType(__x[__is]) << __is) | ...);
782 return _DataType{__vec_value_type<_DataType>(-__x[__is])...};
786 else if constexpr (_S_use_bitmask || _UV::_S_use_bitmask)
787 return basic_mask(__x.to_bitset())._M_data;
790 else if constexpr (_Bytes == _UBytes)
791 return _S_recursive_bit_cast(__x)._M_data;
797 if constexpr (_Bytes == 1 && _UBytes == 2)
798 if (!__is_const_known(__x))
800 if constexpr (_UAbi::_S_nreg == 1)
801 return __x86_cvt_vecmask<_DataType>(__x._M_data);
802 else if constexpr (_UAbi::_S_nreg == 2)
804 auto __lo = __x._M_data0._M_data;
805 auto __hi = __vec_zero_pad_to<sizeof(__lo)>(
806 __x._M_data1._M_concat_data());
807 return __x86_cvt_vecmask<_DataType>(__lo, __hi);
811 return __vec_mask_cast<_DataType>(__x._M_concat_data());
816 using _Base::_MaskBase;
819 template <__simd_generator_invokable<
bool, _S_size> _Fp>
820 [[__gnu__::__always_inline__]]
822 basic_mask(_Fp&& __gen)
823 : _M_data([&] [[__gnu__::__always_inline__]] {
824 constexpr auto [...__is] = _IotaArray<_S_size>;
825 if constexpr (_S_is_scalar)
826 return __gen(__simd_size_c<0>);
827 else if constexpr (_S_use_bitmask)
828 return _DataType(((_DataType(__gen(__simd_size_c<__is>)) << __is)
831 return _DataType{__vec_value_type<_DataType>(
832 __gen(__simd_size_c<__is>) ? -1 : 0)...};
837 [[__gnu__::__always_inline__]]
839 basic_mask(
const same_as<bitset<_S_size>>
auto& __b)
noexcept
840 : basic_mask(
static_cast<_Bitmask<_S_size>
>(__b.to_ullong()))
843 static_assert(_S_size <= numeric_limits<unsigned long long>::digits);
847 template <
unsigned_
integral _Tp>
848 requires (!same_as<_Tp, bool>)
849 [[__gnu__::__always_inline__]]
851 basic_mask(_Tp __val) noexcept
852 : _M_data([&] [[__gnu__::__always_inline__]] () {
853 if constexpr (_S_use_bitmask)
855 else if constexpr (_S_is_scalar)
856 return bool(__val & 1);
857 else if (__is_const_known(__val))
859 constexpr auto [...__is] = _IotaArray<_S_size>;
860 return _DataType {__vec_value_type<_DataType>((__val & (1ull << __is)) == 0
865 using _Ip =
typename _VecType::value_type;
866 _VecType __v0 = _Ip(__val);
867 constexpr int __bits_per_element =
sizeof(_Ip) * __CHAR_BIT__;
868 constexpr _VecType __pow2 = _VecType(cw<1>)
869 << (__iota<_VecType> % cw<__bits_per_element>);
870 if constexpr (_S_size < __bits_per_element)
871 return ((__v0 & __pow2) > cw<0>)._M_concat_data();
872 else if constexpr (_S_size == __bits_per_element)
873 return ((__v0 & __pow2) != cw<0>)._M_concat_data();
876 static_assert(_Bytes == 1);
877 static_assert(
sizeof(_Ip) == 1);
878 _Bitmask<_S_size> __bits = __val;
879 static_assert(
sizeof(_VecType) %
sizeof(__bits) == 0);
880 if constexpr (
sizeof(_DataType) == 32)
882 __vec_builtin_type<_UInt<8>, 4> __v1 = {
883 0xffu & (__bits >> (0 * __CHAR_BIT__)),
884 0xffu & (__bits >> (1 * __CHAR_BIT__)),
885 0xffu & (__bits >> (2 * __CHAR_BIT__)),
886 0xffu & (__bits >> (3 * __CHAR_BIT__)),
888 __v1 *= 0x0101'0101'0101'0101ull;
889 __v0 = __builtin_bit_cast(_VecType, __v1);
890 return ((__v0 & __pow2) != cw<0>)._M_data;
894 using _V1 = vec<_Ip,
sizeof(__bits)>;
895 _V1 __v1 = __builtin_bit_cast(_V1, __bits);
896 __v0 = _VecType::_S_static_permute(__v1, [](
int __i) {
897 return __i / __CHAR_BIT__;
899 return ((__v0 & __pow2) != cw<0>)._M_data;
913 [[__gnu__::__always_inline__]]
915 operator[](__simd_size_type __i)
const
917 __glibcxx_simd_precondition(__i >= 0 && __i < _S_size,
"subscript is out of bounds");
918 if constexpr (_S_is_scalar)
920 else if constexpr (_S_use_bitmask)
921 return bool((_M_data >> __i) & 1);
923 return _M_data[__i] & 1;
927 [[__gnu__::__always_inline__]]
929 operator!() const noexcept
931 if constexpr (_S_is_scalar)
932 return _S_init(!_M_data);
934 return _S_init(~_M_data);
937 [[__gnu__::__always_inline__]]
939 operator+() const noexcept requires destructible<_VecType>
940 {
return operator _VecType(); }
945 [[__gnu__::__always_inline__]]
947 operator-() const noexcept requires destructible<_VecType>
949 using _Ip =
typename _VecType::value_type;
950 if constexpr (_S_is_scalar)
951 return _Ip(-
int(_M_data));
952 else if constexpr (_S_use_bitmask)
953 return __select_impl(*
this, _Ip(-1), _Ip());
956 static_assert(
sizeof(_VecType) ==
sizeof(_M_data));
957 return __builtin_bit_cast(_VecType, _M_data);
964 [[__gnu__::__always_inline__]]
966 operator~() const noexcept requires destructible<_VecType>
968 using _Ip =
typename _VecType::value_type;
969 if constexpr (_S_is_scalar)
970 return _Ip(~
int(_M_data));
971 else if constexpr (_S_use_bitmask)
972 return __select_impl(*
this, _Ip(-2), _Ip(-1));
975 static_assert(
sizeof(_VecType) ==
sizeof(_M_data));
976 return __builtin_bit_cast(_VecType, _M_data) - _Ip(1);
981 operator~() const noexcept = delete;
984 template <typename _Up, typename _UAbi>
985 requires (_UAbi::_S_size == _S_size)
986 [[__gnu__::__always_inline__]]
987 constexpr explicit(sizeof(_Up) != _Bytes)
988 operator basic_vec<_Up, _UAbi>() const noexcept
990 if constexpr (_S_is_scalar)
994 using _UV = basic_vec<_Up, _UAbi>;
995 return __select_impl(
static_cast<_UV::mask_type
>(*
this), _Up(1), _UV());
999 using _Base::operator basic_vec;
1002 [[__gnu__::__always_inline__]]
1003 constexpr bitset<_S_size>
1004 to_bitset() const noexcept
1007 static_assert(_S_size <= numeric_limits<unsigned long long>::digits);
1018 template <
int _Offset = 0, _ArchTraits _Traits = {}>
1019 [[__gnu__::__always_inline__]]
1020 constexpr _Bitmask<_S_size + _Offset>
1023 constexpr int __nbits = _S_size;
1024 static_assert(__nbits + _Offset <= numeric_limits<unsigned long long>::digits);
1026 using _U0 = _Bitmask<__nbits>;
1028 using _Ur = _Bitmask<__nbits + _Offset>;
1029 if constexpr (_S_is_scalar || _S_use_bitmask)
1031 auto __bits = _M_data;
1032 if constexpr (_S_is_partial)
1033 __bits &= _S_implicit_mask;
1034 return _Ur(__bits) << _Offset;
1039 if (!__is_const_known(*
this))
1042 if constexpr (_Bytes != 2)
1043 __uint = _U0(__x86_movmsk(_M_data));
1044 else if constexpr (_Bytes == 2 && _Traits._M_have_bmi2())
1045 __uint = __bit_extract_even<__nbits>(__x86_movmsk(_M_data));
1046 else if constexpr (_Bytes == 2)
1047 return __similar_mask<char, __nbits, _Ap>(*this).template _M_to_uint<_Offset>();
1049 static_assert(
false);
1054 if constexpr (_S_is_partial)
1055 __uint &= (_U0(1) << _S_size) - 1;
1056 return _Ur(__uint) << _Offset;
1059 using _IV = _VecType;
1060 static_assert(destructible<_IV>);
1061 const typename _IV::mask_type& __k = [&] [[__gnu__::__always_inline__]] () {
1062 if constexpr (is_same_v<typename _IV::mask_type, basic_mask>)
1065 return typename _IV::mask_type(*
this);
1067 constexpr int __n = _IV::size();
1068 if constexpr (_Bytes * __CHAR_BIT__ >= __n)
1070 constexpr _IV __pow2 = _IV(cw<1>) << __iota<_IV>;
1071 return _Ur(_U0(__select_impl(__k, __pow2, _IV())
1072 ._M_reduce(bit_or<>()))) << _Offset;
1074 else if constexpr (__n % __CHAR_BIT__ != 0)
1076 constexpr int __n_lo = __n - __n % __CHAR_BIT__;
1077 const auto [__lo, __hi] = chunk<__n_lo>(__k);
1078 _Ur __bits = __hi.template _M_to_uint<_Offset + __n_lo>();
1079 return __bits | __lo.template _M_to_uint<_Offset>();
1083 constexpr _IV __pow2 = _IV(cw<1>) << (__iota<_IV> % _IV(cw<__CHAR_BIT__>));
1084 _IV __x = __select_impl(__k, __pow2, _IV());
1086 __x |= _IV::_S_static_permute(__x, _SwapNeighbors<4>());
1087 __x |= _IV::_S_static_permute(__x, _SwapNeighbors<2>());
1088 __x |= _IV::_S_static_permute(__x, _SwapNeighbors<1>());
1090 __x = _IV::_S_static_permute(__x, [](
int __i) {
1091 return __i * 8 < __n ? __i * 8 : uninit_element;
1094 _U0 __bits = __builtin_bit_cast(
1095 __similar_vec<_U0, __n * _Bytes /
sizeof(_U0), _Ap>, __x)[0];
1097 if constexpr (!__has_single_bit(
unsigned(__nbits)))
1098 __bits &= (_U0(1) << __nbits) - 1;
1099 return _Ur(__bits) << _Offset;
1104 [[__gnu__::__always_inline__]]
1105 constexpr unsigned long long
1107 {
return _M_to_uint(); }
1110 [[__gnu__::__always_inline__]]
1111 friend constexpr basic_mask
1112 operator&&(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1113 {
return _S_init(__x._M_data & __y._M_data); }
1115 [[__gnu__::__always_inline__]]
1116 friend constexpr basic_mask
1117 operator||(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1118 {
return _S_init(__x._M_data | __y._M_data); }
1120 [[__gnu__::__always_inline__]]
1121 friend constexpr basic_mask
1122 operator&(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1123 {
return _S_init(__x._M_data & __y._M_data); }
1125 [[__gnu__::__always_inline__]]
1126 friend constexpr basic_mask
1127 operator|(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1128 {
return _S_init(__x._M_data | __y._M_data); }
1130 [[__gnu__::__always_inline__]]
1131 friend constexpr basic_mask
1132 operator^(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1133 {
return _S_init(__x._M_data ^ __y._M_data); }
1136 [[__gnu__::__always_inline__]]
1137 friend constexpr basic_mask&
1138 operator&=(basic_mask& __x,
const basic_mask& __y)
noexcept
1140 __x._M_data &= __y._M_data;
1144 [[__gnu__::__always_inline__]]
1145 friend constexpr basic_mask&
1146 operator|=(basic_mask& __x,
const basic_mask& __y)
noexcept
1148 __x._M_data |= __y._M_data;
1152 [[__gnu__::__always_inline__]]
1153 friend constexpr basic_mask&
1154 operator^=(basic_mask& __x,
const basic_mask& __y)
noexcept
1156 __x._M_data ^= __y._M_data;
1161 [[__gnu__::__always_inline__]]
1162 friend constexpr basic_mask
1163 operator==(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1164 {
return !(__x ^ __y); }
1166 [[__gnu__::__always_inline__]]
1167 friend constexpr basic_mask
1168 operator!=(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1169 {
return __x ^ __y; }
1171 [[__gnu__::__always_inline__]]
1172 friend constexpr basic_mask
1173 operator>=(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1174 {
return __x || !__y; }
1176 [[__gnu__::__always_inline__]]
1177 friend constexpr basic_mask
1178 operator<=(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1179 {
return !__x || __y; }
1181 [[__gnu__::__always_inline__]]
1182 friend constexpr basic_mask
1183 operator>(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1184 {
return __x && !__y; }
1186 [[__gnu__::__always_inline__]]
1187 friend constexpr basic_mask
1188 operator<(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1189 {
return !__x && __y; }
1192 [[__gnu__::__always_inline__]]
1193 friend constexpr basic_mask
1194 __select_impl(
const basic_mask& __k,
const basic_mask& __t,
const basic_mask& __f)
noexcept
1196 if constexpr (!_S_use_bitmask)
1203 return __k._M_data < 0 ? __t._M_data : __f._M_data;
1205 return __k._M_data ? __t._M_data : __f._M_data;
1208 return (__k._M_data & __t._M_data) | (~__k._M_data & __f._M_data);
1211 [[__gnu__::__always_inline__]]
1212 friend constexpr basic_mask
1213 __select_impl(
const basic_mask& __k, same_as<bool>
auto __t, same_as<bool>
auto __f)
noexcept
1216 return basic_mask(__t);
1218 return __t ? __k : !__k;
1221 template <__vectorizable _T0, same_as<_T0> _T1>
1222 requires (
sizeof(_T0) == _Bytes)
1223 [[__gnu__::__always_inline__]]
1224 friend constexpr vec<_T0, _S_size>
1225 __select_impl(
const basic_mask& __k,
const _T0& __t,
const _T1& __f)
noexcept
1227 if constexpr (_S_is_scalar)
1228 return __k._M_data ? __t : __f;
1231 using _Vp = vec<_T0, _S_size>;
1232 using _Mp =
typename _Vp::mask_type;
1233 return __select_impl(_Mp(__k), _Vp(__t), _Vp(__f));
1238 [[__gnu__::__always_inline__]]
1240 _M_all_of() const noexcept
1242 if constexpr (_S_is_scalar)
1244 else if constexpr (_S_use_bitmask)
1246 if constexpr (_S_is_partial)
1248 return (_M_data & _S_implicit_mask) == _S_implicit_mask;
1250 return _M_data == _S_implicit_mask;
1253 else if (!__is_const_known(_M_data))
1254 return __x86_vecmask_all<_S_size>(_M_data);
1257 return _VecOps<_DataType, _S_size>::_S_all_of(_M_data);
1260 [[__gnu__::__always_inline__]]
1262 _M_any_of() const noexcept
1264 if constexpr (_S_is_scalar)
1266 else if constexpr (_S_use_bitmask)
1268 if constexpr (_S_is_partial)
1270 return (_M_data & _S_implicit_mask) != 0;
1272 return _M_data != 0;
1275 else if (!__is_const_known(_M_data))
1276 return __x86_vecmask_any<_S_size>(_M_data);
1279 return _VecOps<_DataType, _S_size>::_S_any_of(_M_data);
1282 [[__gnu__::__always_inline__]]
1284 _M_none_of() const noexcept
1286 if constexpr (_S_is_scalar)
1288 else if constexpr (_S_use_bitmask)
1290 if constexpr (_S_is_partial)
1292 return (_M_data & _S_implicit_mask) == 0;
1294 return _M_data == 0;
1297 else if (!__is_const_known(_M_data))
1298 return __x86_vecmask_none<_S_size>(_M_data);
1301 return _VecOps<_DataType, _S_size>::_S_none_of(_M_data);
1304 [[__gnu__::__always_inline__]]
1305 constexpr __simd_size_type
1306 _M_reduce_count() const noexcept
1308 if constexpr (_S_is_scalar)
1309 return int(_M_data);
1310 else if constexpr (_S_size <= numeric_limits<unsigned>::digits)
1311 return __builtin_popcount(_M_to_uint());
1313 return __builtin_popcountll(to_ullong());
1316 [[__gnu__::__always_inline__]]
1317 constexpr __simd_size_type
1318 _M_reduce_min_index()
const
1320 const auto __bits = _M_to_uint();
1321 __glibcxx_simd_precondition(__bits,
"An empty mask does not have a min_index.");
1322 if constexpr (_S_size == 1)
1325 return __countr_zero(__bits);
1328 [[__gnu__::__always_inline__]]
1329 constexpr __simd_size_type
1330 _M_reduce_max_index()
const
1332 const auto __bits = _M_to_uint();
1333 __glibcxx_simd_precondition(__bits,
"An empty mask does not have a max_index.");
1334 if constexpr (_S_size == 1)
1337 return __highest_bit(__bits);
1340 [[__gnu__::__always_inline__]]
1341 friend constexpr bool
1342 __is_const_known(
const basic_mask& __x)
1343 {
return __builtin_constant_p(__x._M_data); }
1346 template <
size_t _Bytes, __abi_tag _Ap>
1347 requires (_Ap::_S_nreg > 1)
1348 class basic_mask<_Bytes, _Ap>
1349 : public _MaskBase<_Bytes, _Ap>
1351 using _Base = _MaskBase<_Bytes, _Ap>;
1353 using _VecType = _Base::_VecType;
1355 template <
size_t,
typename>
1356 friend class basic_mask;
1358 template <
typename,
typename>
1359 friend class basic_vec;
1361 static constexpr int _S_size = _Ap::_S_size;
1363 static constexpr int _N0 = __bit_ceil(
unsigned(_S_size)) / 2;
1365 static constexpr int _N1 = _S_size - _N0;
1367 static constexpr int _Nreg0 = __bit_ceil(
unsigned(_Ap::_S_nreg)) / 2;
1369 static constexpr int _Nreg1 = _Ap::_S_nreg - _Nreg0;
1373 using _Abi0 =
decltype(_Ap::template _S_resize<_N0, _Nreg0>());
1375 using _Abi1 =
decltype(_Ap::template _S_resize<_N1, _Nreg1>());
1377 using _Mask0 = basic_mask<_Bytes, _Abi0>;
1380 static_assert(_Mask0::_S_padding_bytes == 0 && !_Mask0::_S_is_partial);
1382 using _Mask1 = basic_mask<_Bytes, _Abi1>;
1384 static constexpr bool _S_is_partial = _Mask1::_S_is_partial;
1388 static_assert(_Mask0::abi_type::_S_nreg + _Mask1::abi_type::_S_nreg == _Ap::_S_nreg);
1390 static constexpr bool _S_use_bitmask = _Mask0::_S_use_bitmask;
1392 static constexpr bool _S_is_scalar = _Mask0::_S_is_scalar;
1398 static constexpr bool _S_has_bool_member = _Mask1::_S_has_bool_member;
1403 static constexpr size_t _S_padding_bytes
1404 = (__alignof__(_Mask0) == __alignof__(_Mask1)
1405 ? 0 : __alignof__(_Mask0) - (
sizeof(_Mask1) % __alignof__(_Mask0)))
1406 + _Mask1::_S_padding_bytes;
1409 using value_type = bool;
1411 using abi_type = _Ap;
1413 using iterator = _Base::iterator;
1415 using const_iterator = _Base::const_iterator;
1417 [[__gnu__::__always_inline__]]
1418 static constexpr basic_mask
1419 _S_init(
const _Mask0& __x,
const _Mask1& __y)
1427 [[__gnu__::__always_inline__]]
1428 static constexpr basic_mask
1429 _S_init(unsigned_integral
auto __bits)
1430 {
return basic_mask(__bits); }
1432 template <
typename _U0,
typename _U1>
1433 [[__gnu__::__always_inline__]]
1434 static constexpr basic_mask
1435 _S_init(
const __trivial_pair<_U0, _U1>& __bits)
1437 if constexpr (is_unsigned_v<_U0>)
1439 static_assert(is_unsigned_v<_U1>);
1440 return _S_init(_Mask0(__bits._M_first), _Mask1(__bits._M_second));
1442 else if constexpr (is_unsigned_v<_U1>)
1443 return _S_init(_Mask0::_S_init(__bits._M_first), _Mask1(__bits._M_second));
1445 return _S_init(_Mask0::_S_init(__bits._M_first), _Mask1::_S_init(__bits._M_second));
1448 [[__gnu__::__always_inline__]]
1449 constexpr const _Mask0&
1451 {
return _M_data0; }
1453 [[__gnu__::__always_inline__]]
1454 constexpr const _Mask1&
1456 {
return _M_data1; }
1458 template <
size_t _UBytes,
typename _UAbi>
1459 [[__gnu__::__always_inline__]]
1460 static constexpr basic_mask
1461 _S_recursive_bit_cast(
const basic_mask<_UBytes, _UAbi>& __x)
1463 using _Mp = basic_mask<_UBytes, _UAbi>;
1464 if constexpr (_Mp::_S_has_bool_member ||
sizeof(basic_mask) >
sizeof(__x)
1465 || _Mp::_S_padding_bytes != 0)
1466 return _S_init(__builtin_bit_cast(_Mask0, __x._M_data0),
1467 _Mask1::_S_recursive_bit_cast(__x._M_data1));
1468 else if constexpr (
sizeof(basic_mask) ==
sizeof(__x))
1469 return __builtin_bit_cast(basic_mask, __x);
1472 struct _Tmp {
alignas(_Mp) basic_mask _M_data; };
1473 return __builtin_bit_cast(_Tmp, __x)._M_data;
1477 [[__gnu__::__always_inline__]]
1479 _M_concat_data(
bool __do_sanitize = _S_is_partial)
const
1481 if constexpr (_S_use_bitmask)
1483 static_assert(_S_size <= numeric_limits<unsigned long long>::digits,
1484 "cannot concat more than 64 bits");
1485 using _Up = _Bitmask<_S_size>;
1486 return _Up(_M_data0._M_concat_data() | (_Up(_M_data1._M_concat_data(__do_sanitize)) << _N0));
1490 auto __lo = _M_data0._M_concat_data();
1491 auto __hi = __vec_zero_pad_to<sizeof(__lo)>(_M_data1._M_concat_data(__do_sanitize));
1492 return __vec_concat(__lo, __hi);
1496 template <_ArchTraits _Traits = {}>
1497 [[__gnu__::__always_inline__]]
1498 static constexpr basic_mask
1499 _S_partial_mask_of_n(
int __n)
1501#if __has_builtin(__builtin_ia32_bzhi_di)
1502 if constexpr (_S_use_bitmask && _S_size <= 64 && _Traits._M_have_bmi2())
1503 return basic_mask(__builtin_ia32_bzhi_di(~0ull >> (64 - _S_size),
unsigned(__n)));
1505 if constexpr (_N0 == 1)
1507 static_assert(_S_size == 2);
1508 return _S_init(_Mask0(
true), _Mask1(
false));
1511 return _S_init(_Mask0::_S_partial_mask_of_n(__n), _Mask1(
false));
1512 else if (__n == _N0 || _N1 == 1)
1513 return _S_init(_Mask0(
true), _Mask1(
false));
1514 else if constexpr (_N1 != 1)
1515 return _S_init(_Mask0(
true), _Mask1::_S_partial_mask_of_n(__n - _N0));
1518 [[__gnu__::__always_inline__]]
1519 constexpr basic_mask&
1522 _M_data0._M_and_neighbors();
1523 _M_data1._M_and_neighbors();
1527 [[__gnu__::__always_inline__]]
1528 constexpr basic_mask&
1531 _M_data0._M_or_neighbors();
1532 _M_data1._M_or_neighbors();
1536 template <
typename _Mp>
1537 [[__gnu__::__always_inline__]]
1539 _M_chunk() const noexcept
1541 constexpr int __n = _S_size / _Mp::_S_size;
1542 constexpr int __rem = _S_size % _Mp::_S_size;
1543 constexpr auto [...__is] = _IotaArray<__n>;
1544 if constexpr (__rem == 0)
1545 return array<_Mp, __n>{__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>,
1546 _M_data0, _M_data1)...};
1549 using _Rest = resize_t<__rem, _Mp>;
1550 return tuple(__extract_simd_at<_Mp>(cw<_Mp::_S_size * __is>, _M_data0, _M_data1)...,
1551 __extract_simd_at<_Rest>(cw<_Mp::_S_size * __n>, _M_data0, _M_data1));
1555 [[__gnu__::__always_inline__]]
1556 static constexpr basic_mask
1557 _S_concat(
const basic_mask& __x0)
noexcept
1560 template <
typename... _As>
1561 requires (
sizeof...(_As) >= 2)
1562 [[__gnu__::__always_inline__]]
1563 static constexpr basic_mask
1564 _S_concat(
const basic_mask<_Bytes, _As>&... __xs)
noexcept
1566 static_assert(_S_size == (_As::_S_size + ...));
1567 return _S_init(__extract_simd_at<_Mask0>(cw<0>, __xs...),
1568 __extract_simd_at<_Mask1>(cw<_N0>, __xs...));
1572 basic_mask() =
default;
1578 [[__gnu__::__always_inline__]]
1580 basic_mask(same_as<bool>
auto __x)
noexcept
1581 : _M_data0(__x), _M_data1(__x)
1585 template <
size_t _UBytes,
typename _UAbi>
1586 requires (_S_size == _UAbi::_S_size)
1587 [[__gnu__::__always_inline__]]
1588 constexpr explicit(__is_mask_conversion_explicit<_Ap, _UAbi>(_Bytes, _UBytes))
1589 basic_mask(
const basic_mask<_UBytes, _UAbi>& __x) noexcept
1591 if constexpr (_UAbi::_S_nreg > 1)
1593 return __x._M_data0;
1595 else if constexpr (_N0 == 1)
1596 return _Mask0(__x[0]);
1598 return get<0>(chunk<_N0>(__x));
1601 if constexpr (_UAbi::_S_nreg > 1)
1603 return __x._M_data1;
1605 else if constexpr (_N1 == 1)
1606 return _Mask1(__x[_N0]);
1608 return get<1>(chunk<_N0>(__x));
1612 using _Base::_MaskBase;
1615 template <__simd_generator_invokable<
bool, _S_size> _Fp>
1616 [[__gnu__::__always_inline__]]
1618 basic_mask(_Fp&& __gen)
1619 : _M_data0(__gen), _M_data1([&] [[__gnu__::__always_inline__]] (auto __i) {
1620 return __gen(__simd_size_c<__i + _N0>);
1625 [[__gnu__::__always_inline__]]
1627 basic_mask(
const same_as<bitset<_S_size>>
auto& __b)
noexcept
1628 : _M_data0(__bitset_split<_N0>(__b)._M_lo), _M_data1(__bitset_split<_N0>(__b)._M_hi)
1632 template <
unsigned_
integral _Tp>
1633 requires (!same_as<_Tp, bool>)
1634 [[__gnu__::__always_inline__]]
1636 basic_mask(_Tp __val) noexcept
1637 : _M_data0(
static_cast<_Bitmask<_N0>
>(__val)),
1638 _M_data1(
sizeof(_Tp) * __CHAR_BIT__ > _N0
1639 ?
static_cast<_Bitmask<_N1>
>(__val >> _N0) : _Bitmask<_N1>())
1643 [[__gnu__::__always_inline__]]
1644 constexpr value_type
1645 operator[](__simd_size_type __i)
const
1647 __glibcxx_simd_precondition(__i >= 0 && __i < _S_size,
"subscript is out of bounds");
1648 if (__is_const_known(__i))
1649 return __i < _N0 ? _M_data0[__i] : _M_data1[__i - _N0];
1650 else if constexpr (_M_data1._S_has_bool_member)
1654 return __i < _N0 ? _M_data0[__i] : _M_data1[__i - _N0];
1655 else if constexpr (abi_type::_S_is_bitmask)
1657 using _AliasingByte [[__gnu__::__may_alias__]] =
unsigned char;
1658 return bool((
reinterpret_cast<const _AliasingByte*
>(
this)
1659 [__i / __CHAR_BIT__] >> (__i % __CHAR_BIT__)) & 1);
1663 using _AliasingInt [[__gnu__::__may_alias__]] = __integer_from<_Bytes>;
1664 return reinterpret_cast<const _AliasingInt*
>(
this)[__i] != 0;
1669 [[__gnu__::__always_inline__]]
1670 constexpr basic_mask
1671 operator!() const noexcept
1672 {
return _S_init(!_M_data0, !_M_data1); }
1674 [[__gnu__::__always_inline__]]
1676 operator+() const noexcept requires destructible<_VecType>
1677 {
return _VecType::_S_concat(+_M_data0, +_M_data1); }
1682 [[__gnu__::__always_inline__]]
1684 operator-() const noexcept requires destructible<_VecType>
1685 {
return _VecType::_S_concat(-_M_data0, -_M_data1); }
1690 [[__gnu__::__always_inline__]]
1692 operator~() const noexcept requires destructible<_VecType>
1693 {
return _VecType::_S_concat(~_M_data0, ~_M_data1); }
1696 operator~() const noexcept = delete;
1699 template <typename _Up, typename _UAbi>
1700 requires (_UAbi::_S_size == _S_size)
1701 [[__gnu__::__always_inline__]]
1702 constexpr explicit(sizeof(_Up) != _Bytes)
1703 operator basic_vec<_Up, _UAbi>() const noexcept
1705 using _Rp = basic_vec<_Up, _UAbi>;
1706 return _Rp::_S_init(
static_cast<_Rp::_DataType0
>(_M_data0),
1707 static_cast<_Rp::_DataType1
>(_M_data1));
1710 using _Base::operator basic_vec;
1713 [[__gnu__::__always_inline__]]
1714 constexpr bitset<_S_size>
1715 to_bitset() const noexcept
1717 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1726 } __tmp = {_M_data0.to_bitset(), _M_data1.to_bitset()};
1727 return __builtin_bit_cast(bitset<_S_size>, __tmp);
1731 template <
int _Offset = 0, _ArchTraits _Traits = {}>
1732 [[__gnu__::__always_inline__]]
1736 constexpr int _N0x = _N0;
1739 static_assert(_Offset == 0);
1740 return __trivial_pair {
1741 _M_data0.template _M_to_uint<0>(),
1742 _M_data1.template _M_to_uint<0>()
1748 if constexpr (_Bytes == 2 && !_Traits._M_have_bmi2() && _Ap::_S_nreg == 2
1750 return __similar_mask<char, _S_size, _Ap>(*this).template _M_to_uint<_Offset>();
1752 auto __uint = _M_data1.template _M_to_uint<_N0x + _Offset>();
1753 __uint |= _M_data0.template _M_to_uint<_Offset>();
1758 [[__gnu__::__always_inline__]]
1759 constexpr unsigned long long
1762 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1763 return _M_to_uint();
1766 __glibcxx_simd_precondition(_M_data1.to_ullong() == 0,
1767 "to_ullong called on mask with 'true' elements at indices"
1768 "higher than representable in a ullong");
1769 return _M_data0.to_ullong();
1774 [[__gnu__::__always_inline__]]
1775 friend constexpr basic_mask
1776 operator&&(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1777 {
return _S_init(__x._M_data0 && __y._M_data0, __x._M_data1 && __y._M_data1); }
1779 [[__gnu__::__always_inline__]]
1780 friend constexpr basic_mask
1781 operator||(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1782 {
return _S_init(__x._M_data0 || __y._M_data0, __x._M_data1 || __y._M_data1); }
1784 [[__gnu__::__always_inline__]]
1785 friend constexpr basic_mask
1786 operator&(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1787 {
return _S_init(__x._M_data0 & __y._M_data0, __x._M_data1 & __y._M_data1); }
1789 [[__gnu__::__always_inline__]]
1790 friend constexpr basic_mask
1791 operator|(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1792 {
return _S_init(__x._M_data0 | __y._M_data0, __x._M_data1 | __y._M_data1); }
1794 [[__gnu__::__always_inline__]]
1795 friend constexpr basic_mask
1796 operator^(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1797 {
return _S_init(__x._M_data0 ^ __y._M_data0, __x._M_data1 ^ __y._M_data1); }
1800 [[__gnu__::__always_inline__]]
1801 friend constexpr basic_mask&
1802 operator&=(basic_mask& __x,
const basic_mask& __y)
noexcept
1804 __x._M_data0 &= __y._M_data0;
1805 __x._M_data1 &= __y._M_data1;
1809 [[__gnu__::__always_inline__]]
1810 friend constexpr basic_mask&
1811 operator|=(basic_mask& __x,
const basic_mask& __y)
noexcept
1813 __x._M_data0 |= __y._M_data0;
1814 __x._M_data1 |= __y._M_data1;
1818 [[__gnu__::__always_inline__]]
1819 friend constexpr basic_mask&
1820 operator^=(basic_mask& __x,
const basic_mask& __y)
noexcept
1822 __x._M_data0 ^= __y._M_data0;
1823 __x._M_data1 ^= __y._M_data1;
1828 [[__gnu__::__always_inline__]]
1829 friend constexpr basic_mask
1830 operator==(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1831 {
return !(__x ^ __y); }
1833 [[__gnu__::__always_inline__]]
1834 friend constexpr basic_mask
1835 operator!=(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1836 {
return __x ^ __y; }
1838 [[__gnu__::__always_inline__]]
1839 friend constexpr basic_mask
1840 operator>=(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1841 {
return __x || !__y; }
1843 [[__gnu__::__always_inline__]]
1844 friend constexpr basic_mask
1845 operator<=(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1846 {
return !__x || __y; }
1848 [[__gnu__::__always_inline__]]
1849 friend constexpr basic_mask
1850 operator>(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1851 {
return __x && !__y; }
1853 [[__gnu__::__always_inline__]]
1854 friend constexpr basic_mask
1855 operator<(
const basic_mask& __x,
const basic_mask& __y)
noexcept
1856 {
return !__x && __y; }
1859 [[__gnu__::__always_inline__]]
1860 friend constexpr basic_mask
1861 __select_impl(
const basic_mask& __k,
const basic_mask& __t,
const basic_mask& __f)
noexcept
1863 return _S_init(__select_impl(__k._M_data0, __t._M_data0, __f._M_data0),
1864 __select_impl(__k._M_data1, __t._M_data1, __f._M_data1));
1867 [[__gnu__::__always_inline__]]
1868 friend constexpr basic_mask
1869 __select_impl(
const basic_mask& __k, same_as<bool>
auto __t, same_as<bool>
auto __f)
noexcept
1872 return basic_mask(__t);
1874 return __t ? __k : !__k;
1877 template <__vectorizable _T0, same_as<_T0> _T1>
1878 requires (
sizeof(_T0) == _Bytes)
1879 [[__gnu__::__always_inline__]]
1880 friend constexpr vec<_T0, _S_size>
1881 __select_impl(
const basic_mask& __k,
const _T0& __t,
const _T1& __f)
noexcept
1883 using _Vp = vec<_T0, _S_size>;
1884 if constexpr (!is_same_v<basic_mask, typename _Vp::mask_type>)
1885 return __select_impl(
static_cast<_Vp::mask_type
>(__k), __t, __f);
1887 return _Vp::_S_init(__select_impl(__k._M_data0, __t, __f),
1888 __select_impl(__k._M_data1, __t, __f));
1891 template <_ArchTraits _Traits = {}>
1892 [[__gnu__::__always_inline__]]
1896 if constexpr (_N0 == _N1)
1897 return (_M_data0 && _M_data1)._M_all_of();
1899 return _M_data0._M_all_of() && _M_data1._M_all_of();
1902 template <_ArchTraits _Traits = {}>
1903 [[__gnu__::__always_inline__]]
1907 if constexpr (_N0 == _N1)
1908 return (_M_data0 || _M_data1)._M_any_of();
1910 return _M_data0._M_any_of() || _M_data1._M_any_of();
1913 template <_ArchTraits _Traits = {}>
1914 [[__gnu__::__always_inline__]]
1918 if constexpr (_N0 == _N1)
1919 return (_M_data0 || _M_data1)._M_none_of();
1921 return _M_data0._M_none_of() && _M_data1._M_none_of();
1924 [[__gnu__::__always_inline__]]
1925 constexpr __simd_size_type
1926 _M_reduce_min_index()
const
1928 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1930 const auto __bits = _M_to_uint();
1931 __glibcxx_simd_precondition(__bits,
"An empty mask does not have a min_index.");
1932 if constexpr (_S_size == 1)
1935 return __countr_zero(_M_to_uint());
1937 else if (_M_data0._M_none_of())
1938 return _M_data1._M_reduce_min_index() + _N0;
1940 return _M_data0._M_reduce_min_index();
1943 [[__gnu__::__always_inline__]]
1944 constexpr __simd_size_type
1945 _M_reduce_max_index()
const
1947 if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
1949 const auto __bits = _M_to_uint();
1950 __glibcxx_simd_precondition(__bits,
"An empty mask does not have a max_index.");
1951 if constexpr (_S_size == 1)
1954 return __highest_bit(_M_to_uint());
1956 else if (_M_data1._M_none_of())
1957 return _M_data0._M_reduce_max_index();
1959 return _M_data1._M_reduce_max_index() + _N0;
1962 [[__gnu__::__always_inline__]]
1963 friend constexpr bool
1964 __is_const_known(
const basic_mask& __x)
1965 {
return __is_const_known(__x._M_data0) && __is_const_known(__x._M_data1); }
1968_GLIBCXX_END_NAMESPACE_VERSION
1971#pragma GCC diagnostic pop
constexpr bool operator<=(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
constexpr bool operator>=(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
constexpr bool operator<(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
constexpr bool operator>(const duration< _Rep1, _Period1 > &__lhs, const duration< _Rep2, _Period2 > &__rhs)
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
_Tp * end(valarray< _Tp > &__va) noexcept
Return an iterator pointing to one past the last element of the valarray.
_Tp * begin(valarray< _Tp > &__va) noexcept
Return an iterator pointing to the first element of the valarray.
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
ISO C++ entities toplevel namespace is std.
constexpr auto cend(const _Container &__cont) noexcept(noexcept(std::end(__cont))) -> decltype(std::end(__cont))
Return an iterator pointing to one past the last element of the const container.
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
constexpr bitset< _Nb > operator^(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
constexpr auto cbegin(const _Container &__cont) noexcept(noexcept(std::begin(__cont))) -> decltype(std::begin(__cont))
Return an iterator pointing to the first element of the const container.
constexpr bitset< _Nb > operator|(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
constexpr bitset< _Nb > operator&(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
static constexpr int digits
static constexpr _Tp max() noexcept
static constexpr _Tp min() noexcept